//===--- PredictableMemOpt.cpp - Perform predictable memory optzns --------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

#define DEBUG_TYPE "predictable-memopt"

#include "polarphp/pil/optimizer/internal/mandatory/PMOMemoryUseCollector.h"
#include "polarphp/basic/BlotSetVector.h"
#include "polarphp/basic/StlExtras.h"
#include "polarphp/pil/lang/BasicBlockUtils.h"
#include "polarphp/pil/lang/BranchPropagatedUser.h"
#include "polarphp/pil/lang/OwnershipUtils.h"
#include "polarphp/pil/lang/PILBuilder.h"
#include "polarphp/pil/optimizer/passmgr/Passes.h"
#include "polarphp/pil/optimizer/passmgr/Transforms.h"
#include "polarphp/pil/optimizer/utils/CFGOptUtils.h"
#include "polarphp/pil/optimizer/utils/InstOptUtils.h"
#include "polarphp/pil/optimizer/utils/PILSSAUpdater.h"
#include "polarphp/pil/optimizer/utils/ValueLifetime.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"

using namespace polar;

STATISTIC(NumLoadPromoted, "Number of loads promoted");
STATISTIC(NumLoadTakePromoted, "Number of load takes promoted");
STATISTIC(NumDestroyAddrPromoted, "Number of destroy_addrs promoted");
STATISTIC(NumAllocRemoved, "Number of allocations completely removed");

//===----------------------------------------------------------------------===//
//                            Subelement Analysis
//===----------------------------------------------------------------------===//

// We can only analyze components of structs whose storage is fully accessible
// from Swift.
static StructDecl *
getFullyReferenceableStruct(PILType Ty) {
   auto SD = Ty.getStructOrBoundGenericStruct();
   if (!SD || SD->hasUnreferenceableStorage())
      return nullptr;
   return SD;
}

static unsigned getNumSubElements(PILType T, PILModule &M,
                                  TypeExpansionContext context) {

   if (auto TT = T.getAs<TupleType>()) {
      unsigned NumElements = 0;
      for (auto index : indices(TT.getElementTypes()))
         NumElements +=
            getNumSubElements(T.getTupleElementType(index), M, context);
      return NumElements;
   }

   if (auto *SD = getFullyReferenceableStruct(T)) {
      unsigned NumElements = 0;
      for (auto *D : SD->getStoredProperties())
         NumElements +=
            getNumSubElements(T.getFieldType(D, M, context), M, context);
      return NumElements;
   }

   // If this isn't a tuple or struct, it is a single element.
   return 1;
}

/// getAccessPathRoot - Given an address, dive through any tuple/struct element
/// addresses to get the underlying value.
static PILValue getAccessPathRoot(PILValue pointer) {
   while (true) {
      if (auto *TEAI = dyn_cast<TupleElementAddrInst>(pointer)) {
         pointer = TEAI->getOperand();
         continue;
      }

      if (auto *SEAI = dyn_cast<StructElementAddrInst>(pointer)) {
         pointer = SEAI->getOperand();
         continue;
      }

      if (auto *BAI = dyn_cast<BeginAccessInst>(pointer)) {
         pointer = BAI->getSource();
         continue;
      }

      return pointer;
   }
}

/// Compute the subelement number indicated by the specified pointer (which is
/// derived from the root by a series of tuple/struct element addresses) by
/// treating the type as a linearized namespace with sequential elements.  For
/// example, given:
///
///   root = alloc { a: { c: i64, d: i64 }, b: (i64, i64) }
///   tmp1 = struct_element_addr root, 1
///   tmp2 = tuple_element_addr tmp1, 0
///
/// This will return a subelement number of 2.
///
/// If this pointer is to within an existential projection, it returns ~0U.
static unsigned computeSubelement(PILValue Pointer,
                                  SingleValueInstruction *RootInst) {
   unsigned SubElementNumber = 0;
   PILModule &M = RootInst->getModule();

   while (1) {
      // If we got to the root, we're done.
      if (RootInst == Pointer)
         return SubElementNumber;

      if (auto *PBI = dyn_cast<ProjectBoxInst>(Pointer)) {
         Pointer = PBI->getOperand();
         continue;
      }

      if (auto *BAI = dyn_cast<BeginAccessInst>(Pointer)) {
         Pointer = BAI->getSource();
         continue;
      }

      if (auto *TEAI = dyn_cast<TupleElementAddrInst>(Pointer)) {
         PILType TT = TEAI->getOperand()->getType();

         // Keep track of what subelement is being referenced.
         for (unsigned i = 0, e = TEAI->getFieldNo(); i != e; ++i) {
            SubElementNumber +=
               getNumSubElements(TT.getTupleElementType(i), M,
                                 TypeExpansionContext(*RootInst->getFunction()));
         }
         Pointer = TEAI->getOperand();
         continue;
      }

      if (auto *SEAI = dyn_cast<StructElementAddrInst>(Pointer)) {
         PILType ST = SEAI->getOperand()->getType();

         // Keep track of what subelement is being referenced.
         StructDecl *SD = SEAI->getStructDecl();
         for (auto *D : SD->getStoredProperties()) {
            if (D == SEAI->getField()) break;
            auto context = TypeExpansionContext(*RootInst->getFunction());
            SubElementNumber +=
               getNumSubElements(ST.getFieldType(D, M, context), M, context);
         }

         Pointer = SEAI->getOperand();
         continue;
      }

      // This fails when we visit unchecked_take_enum_data_addr. We should just
      // add support for enums.
      assert(isa<InitExistentialAddrInst>(Pointer) &&
             "Unknown access path instruction");
      // Cannot promote loads and stores from within an existential projection.
      return ~0U;
   }
}

//===----------------------------------------------------------------------===//
//                              Available Value
//===----------------------------------------------------------------------===//

namespace {

class AvailableValueAggregator;

struct AvailableValue {
   friend class AvailableValueAggregator;

   PILValue Value;
   unsigned SubElementNumber;

   /// If this gets too expensive in terms of copying, we can use an arena and a
   /// FrozenPtrSet like we do in ARC.
   SmallSetVector<StoreInst *, 1> InsertionPoints;

   /// Just for updating.
   SmallVectorImpl<PMOMemoryUse> *Uses;

public:
   AvailableValue() = default;

   /// Main initializer for available values.
   ///
   /// *NOTE* We assume that all available values start with a singular insertion
   /// point and insertion points are added by merging.
   AvailableValue(PILValue Value, unsigned SubElementNumber,
                  StoreInst *InsertPoint)
      : Value(Value), SubElementNumber(SubElementNumber), InsertionPoints() {
      InsertionPoints.insert(InsertPoint);
   }

   /// Deleted copy constructor. This is a move only type.
   AvailableValue(const AvailableValue &) = delete;

   /// Deleted copy operator. This is a move only type.
   AvailableValue &operator=(const AvailableValue &) = delete;

   /// Move constructor.
   AvailableValue(AvailableValue &&Other)
      : Value(nullptr), SubElementNumber(~0), InsertionPoints() {
      std::swap(Value, Other.Value);
      std::swap(SubElementNumber, Other.SubElementNumber);
      std::swap(InsertionPoints, Other.InsertionPoints);
   }

   /// Move operator.
   AvailableValue &operator=(AvailableValue &&Other) {
      std::swap(Value, Other.Value);
      std::swap(SubElementNumber, Other.SubElementNumber);
      std::swap(InsertionPoints, Other.InsertionPoints);
      return *this;
   }

   operator bool() const { return bool(Value); }

   bool operator==(const AvailableValue &Other) const {
      return Value == Other.Value && SubElementNumber == Other.SubElementNumber;
   }

   bool operator!=(const AvailableValue &Other) const {
      return !(*this == Other);
   }

   PILValue getValue() const { return Value; }
   PILType getType() const { return Value->getType(); }
   unsigned getSubElementNumber() const { return SubElementNumber; }
   ArrayRef<StoreInst *> getInsertionPoints() const {
      return InsertionPoints.getArrayRef();
   }

   void mergeInsertionPoints(const AvailableValue &Other) & {
      assert(Value == Other.Value && SubElementNumber == Other.SubElementNumber);
      InsertionPoints.set_union(Other.InsertionPoints);
   }

   void addInsertionPoint(StoreInst *si) & { InsertionPoints.insert(si); }

   AvailableValue emitStructExtract(PILBuilder &B, PILLocation Loc, VarDecl *D,
                                    unsigned SubElementNumber) const {
      PILValue NewValue = B.emitStructExtract(Loc, Value, D);
      return {NewValue, SubElementNumber, InsertionPoints};
   }

   AvailableValue emitTupleExtract(PILBuilder &B, PILLocation Loc,
                                   unsigned EltNo,
                                   unsigned SubElementNumber) const {
      PILValue NewValue = B.emitTupleExtract(Loc, Value, EltNo);
      return {NewValue, SubElementNumber, InsertionPoints};
   }

   AvailableValue emitBeginBorrow(PILBuilder &b, PILLocation loc) const {
      // If we do not have ownership or already are guaranteed, just return a copy
      // of our state.
      if (!b.hasOwnership() || Value.getOwnershipKind().isCompatibleWith(
         ValueOwnershipKind::Guaranteed)) {
         return {Value, SubElementNumber, InsertionPoints};
      }

      // Otherwise, return newValue.
      return {b.createBeginBorrow(loc, Value), SubElementNumber, InsertionPoints};
   }

   void dump() const LLVM_ATTRIBUTE_USED;
   void print(llvm::raw_ostream &os) const;

private:
   /// Private constructor.
   AvailableValue(PILValue Value, unsigned SubElementNumber,
                  const decltype(InsertionPoints) &InsertPoints)
      : Value(Value), SubElementNumber(SubElementNumber),
        InsertionPoints(InsertPoints) {}
};

} // end anonymous namespace

void AvailableValue::dump() const { print(llvm::dbgs()); }

void AvailableValue::print(llvm::raw_ostream &os) const {
   os << "Available Value Dump. Value: ";
   if (getValue()) {
      os << getValue();
   } else {
      os << "NoValue;\n";
   }
   os << "SubElementNumber: " << getSubElementNumber() << "\n";
   os << "Insertion Points:\n";
   for (auto *I : getInsertionPoints()) {
      os << *I;
   }
}

namespace llvm {

llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const AvailableValue &V) {
   V.print(os);
   return os;
}

} // end llvm namespace

//===----------------------------------------------------------------------===//
//                           Subelement Extraction
//===----------------------------------------------------------------------===//

/// Given an aggregate value and an access path, non-destructively extract the
/// value indicated by the path.
static PILValue nonDestructivelyExtractSubElement(const AvailableValue &Val,
                                                  PILBuilder &B,
                                                  PILLocation Loc) {
   PILType ValTy = Val.getType();
   unsigned SubElementNumber = Val.SubElementNumber;

   // Extract tuple elements.
   if (auto TT = ValTy.getAs<TupleType>()) {
      for (unsigned EltNo : indices(TT.getElementTypes())) {
         // Keep track of what subelement is being referenced.
         PILType EltTy = ValTy.getTupleElementType(EltNo);
         unsigned NumSubElt = getNumSubElements(
            EltTy, B.getModule(), TypeExpansionContext(B.getFunction()));
         if (SubElementNumber < NumSubElt) {
            auto BorrowedVal = Val.emitBeginBorrow(B, Loc);
            auto NewVal =
               BorrowedVal.emitTupleExtract(B, Loc, EltNo, SubElementNumber);
            PILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc);
            // If our original value wasn't guaranteed and we did actually perform a
            // borrow as a result, insert the end_borrow.
            if (BorrowedVal.getValue() != Val.getValue())
               B.createEndBorrow(Loc, BorrowedVal.getValue());
            return result;
         }

         SubElementNumber -= NumSubElt;
      }

      llvm_unreachable("Didn't find field");
   }

   // Extract struct elements.
   if (auto *SD = getFullyReferenceableStruct(ValTy)) {
      for (auto *D : SD->getStoredProperties()) {
         auto fieldType = ValTy.getFieldType(
            D, B.getModule(), TypeExpansionContext(B.getFunction()));
         unsigned NumSubElt = getNumSubElements(
            fieldType, B.getModule(), TypeExpansionContext(B.getFunction()));

         if (SubElementNumber < NumSubElt) {
            auto BorrowedVal = Val.emitBeginBorrow(B, Loc);
            auto NewVal =
               BorrowedVal.emitStructExtract(B, Loc, D, SubElementNumber);
            PILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc);
            // If our original value wasn't guaranteed and we did actually perform a
            // borrow as a result, insert the end_borrow.
            if (BorrowedVal.getValue() != Val.getValue())
               B.createEndBorrow(Loc, BorrowedVal.getValue());
            return result;
         }

         SubElementNumber -= NumSubElt;

      }
      llvm_unreachable("Didn't find field");
   }

   // Otherwise, we're down to a scalar. If we have ownership enabled,
   // we return a copy. Otherwise, there we can ignore ownership
   // issues. This is ok since in [ossa] we are going to eliminate a
   // load [copy] or a load [trivial], while in non-[ossa] PIL we will
   // be replacing unqualified loads.
   assert(SubElementNumber == 0 && "Miscalculation indexing subelements");
   if (!B.hasOwnership())
      return Val.getValue();
   return B.emitCopyValueOperation(Loc, Val.getValue());
}

//===----------------------------------------------------------------------===//
//                        Available Value Aggregation
//===----------------------------------------------------------------------===//

static bool anyMissing(unsigned StartSubElt, unsigned NumSubElts,
                       ArrayRef<AvailableValue> &Values) {
   while (NumSubElts) {
      if (!Values[StartSubElt])
         return true;
      ++StartSubElt;
      --NumSubElts;
   }
   return false;
}

namespace {

enum class AvailableValueExpectedOwnership {
   Take,
   Borrow,
   Copy,
};

/// A class that aggregates available values, loading them if they are not
/// available.
class AvailableValueAggregator {
   PILModule &M;
   PILBuilderWithScope B;
   PILLocation Loc;
   MutableArrayRef<AvailableValue> AvailableValueList;
   SmallVectorImpl<PMOMemoryUse> &Uses;
   DeadEndBlocks &deadEndBlocks;
   AvailableValueExpectedOwnership expectedOwnership;

   /// Keep track of all instructions that we have added. Once we are done
   /// promoting a value, we need to make sure that if we need to balance any
   /// copies (to avoid leaks), we do so. This is not used if we are performing a
   /// take.
   SmallVector<PILInstruction *, 16> insertedInsts;

   /// The list of phi nodes inserted by the SSA updater.
   SmallVector<PILPhiArgument *, 16> insertedPhiNodes;

   /// A set of copy_values whose lifetime we balanced while inserting phi
   /// nodes. This means that these copy_value must be skipped in
   /// addMissingDestroysForCopiedValues.
   SmallPtrSet<CopyValueInst *, 16> copyValueProcessedWithPhiNodes;

public:
   AvailableValueAggregator(PILInstruction *Inst,
                            MutableArrayRef<AvailableValue> AvailableValueList,
                            SmallVectorImpl<PMOMemoryUse> &Uses,
                            DeadEndBlocks &deadEndBlocks,
                            AvailableValueExpectedOwnership expectedOwnership)
      : M(Inst->getModule()), B(Inst), Loc(Inst->getLoc()),
        AvailableValueList(AvailableValueList), Uses(Uses),
        deadEndBlocks(deadEndBlocks), expectedOwnership(expectedOwnership) {}

   // This is intended to be passed by reference only once constructed.
   AvailableValueAggregator(const AvailableValueAggregator &) = delete;
   AvailableValueAggregator(AvailableValueAggregator &&) = delete;
   AvailableValueAggregator &
   operator=(const AvailableValueAggregator &) = delete;
   AvailableValueAggregator &operator=(AvailableValueAggregator &&) = delete;

   PILValue aggregateValues(PILType LoadTy, PILValue Address, unsigned FirstElt,
                            bool isTopLevel = true);
   bool canTake(PILType loadTy, unsigned firstElt) const;

   void print(llvm::raw_ostream &os) const;
   void dump() const LLVM_ATTRIBUTE_USED;

   bool isTake() const {
      return expectedOwnership == AvailableValueExpectedOwnership::Take;
   }

   bool isBorrow() const {
      return expectedOwnership == AvailableValueExpectedOwnership::Borrow;
   }

   bool isCopy() const {
      return expectedOwnership == AvailableValueExpectedOwnership::Copy;
   }

   /// Given a load_borrow that we have aggregated a new value for, fixup the
   /// reference counts of the intermediate copies and phis to ensure that all
   /// forwarding operations in the CFG are strongly control equivalent (i.e. run
   /// the same number of times).
   void fixupOwnership(PILInstruction *load, PILValue newVal) {
      assert(isa<LoadBorrowInst>(load) || isa<LoadInst>(load));

      addHandOffCopyDestroysForPhis(load, newVal);
      addMissingDestroysForCopiedValues(load, newVal);
   }

private:
   PILValue aggregateFullyAvailableValue(PILType loadTy, unsigned firstElt);
   PILValue aggregateTupleSubElts(TupleType *tt, PILType loadTy,
                                  PILValue address, unsigned firstElt);
   PILValue aggregateStructSubElts(StructDecl *sd, PILType loadTy,
                                   PILValue address, unsigned firstElt);
   PILValue handlePrimitiveValue(PILType loadTy, PILValue address,
                                 unsigned firstElt);
   bool isFullyAvailable(PILType loadTy, unsigned firstElt) const;


   /// If as a result of us copying values, we may have unconsumed destroys, find
   /// the appropriate location and place the values there. Only used when
   /// ownership is enabled.
   void addMissingDestroysForCopiedValues(PILInstruction *load, PILValue newVal);

   /// As a result of us using the SSA updater, insert hand off copy/destroys at
   /// each phi and make sure that intermediate phis do not leak by inserting
   /// destroys along paths that go through the intermediate phi that do not also
   /// go through the
   void addHandOffCopyDestroysForPhis(PILInstruction *load, PILValue newVal);
};

} // end anonymous namespace

void AvailableValueAggregator::dump() const { print(llvm::dbgs()); }

void AvailableValueAggregator::print(llvm::raw_ostream &os) const {
   os << "Available Value List, N = " << AvailableValueList.size()
      << ". Elts:\n";
   for (auto &V : AvailableValueList) {
      os << V;
   }
}

bool AvailableValueAggregator::isFullyAvailable(PILType loadTy,
                                                unsigned firstElt) const {
   if (firstElt >= AvailableValueList.size()) { // #Elements may be zero.
      return false;
   }

   auto &firstVal = AvailableValueList[firstElt];

   // Make sure that the first element is available and is the correct type.
   if (!firstVal || firstVal.getType() != loadTy)
      return false;

   return llvm::all_of(range(getNumSubElements(
      loadTy, M, TypeExpansionContext(B.getFunction()))),
                       [&](unsigned index) -> bool {
                          auto &val = AvailableValueList[firstElt + index];
                          return val.getValue() == firstVal.getValue() &&
                                 val.getSubElementNumber() == index;
                       });
}

// We can only take if we never have to split a larger value to promote this
// address.
bool AvailableValueAggregator::canTake(PILType loadTy,
                                       unsigned firstElt) const {
   // If we do not have ownership, we can always take since we do not need to
   // keep any ownership invariants up to date. In the future, we should be able
   // to chop up larger values before they are being stored.
   if (!B.hasOwnership())
      return true;

   // If we are trivially fully available, just return true.
   if (isFullyAvailable(loadTy, firstElt))
      return true;

   // Otherwise see if we are an aggregate with fully available leaf types.
   if (TupleType *tt = loadTy.getAs<TupleType>()) {
      return llvm::all_of(indices(tt->getElements()), [&](unsigned eltNo) {
         PILType eltTy = loadTy.getTupleElementType(eltNo);
         unsigned numSubElt =
            getNumSubElements(eltTy, M, TypeExpansionContext(B.getFunction()));
         bool success = canTake(eltTy, firstElt);
         firstElt += numSubElt;
         return success;
      });
   }

   if (auto *sd = getFullyReferenceableStruct(loadTy)) {
      return llvm::all_of(sd->getStoredProperties(), [&](VarDecl *decl) -> bool {
         auto context = TypeExpansionContext(B.getFunction());
         PILType eltTy = loadTy.getFieldType(decl, M, context);
         unsigned numSubElt = getNumSubElements(eltTy, M, context);
         bool success = canTake(eltTy, firstElt);
         firstElt += numSubElt;
         return success;
      });
   }

   // Otherwise, fail. The value is not fully available at its leafs. We can not
   // perform a take.
   return false;
}

/// Given a bunch of primitive subelement values, build out the right aggregate
/// type (LoadTy) by emitting tuple and struct instructions as necessary.
PILValue AvailableValueAggregator::aggregateValues(PILType LoadTy,
                                                   PILValue Address,
                                                   unsigned FirstElt,
                                                   bool isTopLevel) {
   // If we are performing a take, make sure that we have available values for
   // /all/ of our values. Otherwise, bail.
   if (isTopLevel && isTake() && !canTake(LoadTy, FirstElt)) {
      return PILValue();
   }

   // Check to see if the requested value is fully available, as an aggregate.
   // This is a super-common case for single-element structs, but is also a
   // general answer for arbitrary structs and tuples as well.
   if (PILValue Result = aggregateFullyAvailableValue(LoadTy, FirstElt)) {
      return Result;
   }

   // If we have a tuple type, then aggregate the tuple's elements into a full
   // tuple value.
   if (TupleType *tupleType = LoadTy.getAs<TupleType>()) {
      PILValue result =
         aggregateTupleSubElts(tupleType, LoadTy, Address, FirstElt);
      if (isTopLevel &&
          result.getOwnershipKind() == ValueOwnershipKind::Guaranteed) {
         PILValue borrowedResult = result;
         PILBuilderWithScope builder(&*B.getInsertionPoint(), &insertedInsts);
         result = builder.emitCopyValueOperation(Loc, borrowedResult);
         SmallVector<BorrowScopeIntroducingValue, 4> introducers;
         bool foundIntroducers =
            getUnderlyingBorrowIntroducingValues(borrowedResult, introducers);
         (void)foundIntroducers;
         assert(foundIntroducers);
         for (auto value : introducers) {
            builder.emitEndBorrowOperation(Loc, value.value);
         }
      }
      return result;
   }

   // If we have a struct type, then aggregate the struct's elements into a full
   // struct value.
   if (auto *structDecl = getFullyReferenceableStruct(LoadTy)) {
      PILValue result =
         aggregateStructSubElts(structDecl, LoadTy, Address, FirstElt);
      if (isTopLevel &&
          result.getOwnershipKind() == ValueOwnershipKind::Guaranteed) {
         PILValue borrowedResult = result;
         PILBuilderWithScope builder(&*B.getInsertionPoint(), &insertedInsts);
         result = builder.emitCopyValueOperation(Loc, borrowedResult);
         SmallVector<BorrowScopeIntroducingValue, 4> introducers;
         bool foundIntroducers =
            getUnderlyingBorrowIntroducingValues(borrowedResult, introducers);
         (void)foundIntroducers;
         assert(foundIntroducers);
         for (auto value : introducers) {
            builder.emitEndBorrowOperation(Loc, value.value);
         }
      }
      return result;
   }

   // Otherwise, we have a non-aggregate primitive. Load or extract the value.
   //
   // NOTE: We should never call this when taking since when taking we know that
   // our underlying value is always fully available.
   assert(!isTake());
   return handlePrimitiveValue(LoadTy, Address, FirstElt);
}

// See if we have this value is fully available. In such a case, return it as an
// aggregate. This is a super-common case for single-element structs, but is
// also a general answer for arbitrary structs and tuples as well.
PILValue
AvailableValueAggregator::aggregateFullyAvailableValue(PILType loadTy,
                                                       unsigned firstElt) {
   // Check if our underlying type is fully available. If it isn't, bail.
   if (!isFullyAvailable(loadTy, firstElt))
      return PILValue();

   // Ok, grab out first value. (note: any actually will do).
   auto &firstVal = AvailableValueList[firstElt];

   // Ok, we know that all of our available values are all parts of the same
   // value. Without ownership, we can just return the underlying first value.
   if (!B.hasOwnership())
      return firstVal.getValue();

   // Otherwise, we need to put in a copy. This is b/c we only propagate along +1
   // values and we are eliminating a load [copy].
   ArrayRef<StoreInst *> insertPts = firstVal.getInsertionPoints();
   if (insertPts.size() == 1) {
      // Use the scope and location of the store at the insertion point.
      PILBuilderWithScope builder(insertPts[0], &insertedInsts);
      PILLocation loc = insertPts[0]->getLoc();
      // If we have a take, just return the value.
      if (isTake())
         return firstVal.getValue();
      // Otherwise, return a copy of the value.
      return builder.emitCopyValueOperation(loc, firstVal.getValue());
   }

   // If we have multiple insertion points, put copies at each point and use the
   // SSA updater to get a value. The reason why this is safe is that we can only
   // have multiple insertion points if we are storing exactly the same value
   // implying that we can just copy firstVal at each insertion point.
   PILSSAUpdater updater(&insertedPhiNodes);
   updater.Initialize(loadTy);

   Optional<PILValue> singularValue;
   for (auto *insertPt : insertPts) {
      // Use the scope and location of the store at the insertion point.
      PILBuilderWithScope builder(insertPt, &insertedInsts);
      PILLocation loc = insertPt->getLoc();
      PILValue eltVal = firstVal.getValue();

      // If we are not taking, copy the element value.
      if (!isTake()) {
         eltVal = builder.emitCopyValueOperation(loc, eltVal);
      }

      if (!singularValue.hasValue()) {
         singularValue = eltVal;
      } else if (*singularValue != eltVal) {
         singularValue = PILValue();
      }

      // And then put the value into the SSA updater.
      updater.AddAvailableValue(insertPt->getParent(), eltVal);
   }

   // If we only are tracking a singular value, we do not need to construct
   // SSA. Just return that value.
   if (auto val = singularValue.getValueOr(PILValue())) {
      // This assert documents that we are expecting that if we are in ossa, have
      // a non-trivial value, and are not taking, we should never go down this
      // code path. If we did, we would need to insert a copy here. The reason why
      // we know we will never go down this code path is since we have been
      // inserting copy_values implying that our potential singular value would be
      // of the copy_values which are guaranteed to all be different.
      assert((!B.hasOwnership() || isTake() ||
              val->getType().isTrivial(*B.getInsertionBB()->getParent())) &&
             "Should never reach this code path if we are in ossa and have a "
             "non-trivial value");
      return val;
   }

   // Finally, grab the value from the SSA updater.
   PILValue result = updater.GetValueInMiddleOfBlock(B.getInsertionBB());
   assert(result.getOwnershipKind().isCompatibleWith(ValueOwnershipKind::Owned));
   if (isTake() || !B.hasOwnership()) {
      return result;
   }

   // Be careful with this value and insert a copy in our load block to prevent
   // any weird control equivalence issues.
   PILBuilderWithScope builder(&*B.getInsertionPoint(), &insertedInsts);
   return builder.emitCopyValueOperation(Loc, result);
}

PILValue AvailableValueAggregator::aggregateTupleSubElts(TupleType *TT,
                                                         PILType LoadTy,
                                                         PILValue Address,
                                                         unsigned FirstElt) {
   SmallVector<PILValue, 4> ResultElts;

   for (unsigned EltNo : indices(TT->getElements())) {
      PILType EltTy = LoadTy.getTupleElementType(EltNo);
      unsigned NumSubElt =
         getNumSubElements(EltTy, M, TypeExpansionContext(B.getFunction()));

      // If we are missing any of the available values in this struct element,
      // compute an address to load from.
      PILValue EltAddr;
      if (anyMissing(FirstElt, NumSubElt, AvailableValueList)) {
         assert(!isTake() && "When taking, values should never be missing?!");
         EltAddr =
            B.createTupleElementAddr(Loc, Address, EltNo, EltTy.getAddressType());
      }

      ResultElts.push_back(
         aggregateValues(EltTy, EltAddr, FirstElt, /*isTopLevel*/ false));
      FirstElt += NumSubElt;
   }

   // If we are going to use this to promote a borrowed value, insert borrow
   // operations. Eventually I am going to do this for everything, but this
   // should make it easier to bring up.
   if (!isTake()) {
      for (unsigned i : indices(ResultElts)) {
         ResultElts[i] = B.emitBeginBorrowOperation(Loc, ResultElts[i]);
      }
   }

   return B.createTuple(Loc, LoadTy, ResultElts);
}

PILValue AvailableValueAggregator::aggregateStructSubElts(StructDecl *sd,
                                                          PILType loadTy,
                                                          PILValue address,
                                                          unsigned firstElt) {
   SmallVector<PILValue, 4> resultElts;

   for (auto *decl : sd->getStoredProperties()) {
      auto context = TypeExpansionContext(B.getFunction());
      PILType eltTy = loadTy.getFieldType(decl, M, context);
      unsigned numSubElt = getNumSubElements(eltTy, M, context);

      // If we are missing any of the available values in this struct element,
      // compute an address to load from.
      PILValue eltAddr;
      if (anyMissing(firstElt, numSubElt, AvailableValueList)) {
         assert(!isTake() && "When taking, values should never be missing?!");
         eltAddr =
            B.createStructElementAddr(Loc, address, decl, eltTy.getAddressType());
      }

      resultElts.push_back(
         aggregateValues(eltTy, eltAddr, firstElt, /*isTopLevel*/ false));
      firstElt += numSubElt;
   }

   if (!isTake()) {
      for (unsigned i : indices(resultElts)) {
         resultElts[i] = B.emitBeginBorrowOperation(Loc, resultElts[i]);
      }
   }

   return B.createStruct(Loc, loadTy, resultElts);
}

// We have looked through all of the aggregate values and finally found a value
// that is not available without transforming, i.e. a "primitive value". If the
// value is available, use it (extracting if we need to), otherwise emit a load
// of the value with the appropriate qualifier.
PILValue AvailableValueAggregator::handlePrimitiveValue(PILType loadTy,
                                                        PILValue address,
                                                        unsigned firstElt) {
   assert(!isTake() && "Should only take fully available values?!");

   // If the value is not available, load the value and update our use list.
   auto &val = AvailableValueList[firstElt];
   if (!val) {
      LoadInst *load = ([&]() {
         if (B.hasOwnership()) {
            PILBuilderWithScope builder(&*B.getInsertionPoint(), &insertedInsts);
            return builder.createTrivialLoadOr(Loc, address,
                                               LoadOwnershipQualifier::Copy);
         }
         return B.createLoad(Loc, address, LoadOwnershipQualifier::Unqualified);
      }());
      Uses.emplace_back(load, PMOUseKind::Load);
      return load;
   }

   // If we have 1 insertion point, just extract the value and return.
   //
   // This saves us from having to spend compile time in the SSA updater in this
   // case.
   ArrayRef<StoreInst *> insertPts = val.getInsertionPoints();
   if (insertPts.size() == 1) {
      // Use the scope and location of the store at the insertion point.
      PILBuilderWithScope builder(insertPts[0], &insertedInsts);
      PILLocation loc = insertPts[0]->getLoc();
      PILValue eltVal = nonDestructivelyExtractSubElement(val, builder, loc);
      assert(
         !builder.hasOwnership() ||
         eltVal.getOwnershipKind().isCompatibleWith(ValueOwnershipKind::Owned));
      assert(eltVal->getType() == loadTy && "Subelement types mismatch");

      if (!builder.hasOwnership()) {
         return eltVal;
      }

      PILBuilderWithScope builder2(&*B.getInsertionPoint(), &insertedInsts);
      return builder2.emitCopyValueOperation(Loc, eltVal);
   }

   // If we have an available value, then we want to extract the subelement from
   // the borrowed aggregate before each insertion point. Note that since we have
   // inserted copies at each of these insertion points, we know that we will
   // never have the same value along all paths unless we have a trivial value
   // meaning the SSA updater given a non-trivial value must /always/ be used.
   PILSSAUpdater updater(&insertedPhiNodes);
   updater.Initialize(loadTy);

   Optional<PILValue> singularValue;
   for (auto *i : insertPts) {
      // Use the scope and location of the store at the insertion point.
      PILBuilderWithScope builder(i, &insertedInsts);
      PILLocation loc = i->getLoc();
      PILValue eltVal = nonDestructivelyExtractSubElement(val, builder, loc);
      assert(
         !builder.hasOwnership() ||
         eltVal.getOwnershipKind().isCompatibleWith(ValueOwnershipKind::Owned));

      if (!singularValue.hasValue()) {
         singularValue = eltVal;
      } else if (*singularValue != eltVal) {
         singularValue = PILValue();
      }

      updater.AddAvailableValue(i->getParent(), eltVal);
   }

   PILBasicBlock *insertBlock = B.getInsertionBB();

   // If we are not in ossa and have a singular value or if we are in ossa and
   // have a trivial singular value, just return that value.
   //
   // This can never happen for non-trivial values in ossa since we never should
   // visit this code path if we have a take implying that non-trivial values
   // /will/ have a copy and thus are guaranteed (since each copy yields a
   // different value) to not be singular values.
   if (auto val = singularValue.getValueOr(PILValue())) {
      assert((!B.hasOwnership() ||
              val->getType().isTrivial(*insertBlock->getParent())) &&
             "Should have inserted copies for each insertion point, so shouldn't "
             "have a singular value if non-trivial?!");
      return val;
   }

   // Finally, grab the value from the SSA updater.
   PILValue eltVal = updater.GetValueInMiddleOfBlock(insertBlock);
   assert(!B.hasOwnership() ||
          eltVal.getOwnershipKind().isCompatibleWith(ValueOwnershipKind::Owned));
   assert(eltVal->getType() == loadTy && "Subelement types mismatch");
   if (!B.hasOwnership())
      return eltVal;
   PILBuilderWithScope builder(&*B.getInsertionPoint(), &insertedInsts);
   return builder.emitCopyValueOperation(Loc, eltVal);
}

static PILInstruction *getNonPhiBlockIncomingValueDef(PILValue incomingValue,
                                                      CopyValueInst *phiCopy) {
   auto *phiBlock = phiCopy->getParent();
   if (phiBlock == incomingValue->getParentBlock()) {
      return nullptr;
   }

   if (auto *cvi = dyn_cast<CopyValueInst>(incomingValue)) {
      return cvi;
   }

   assert(isa<PILPhiArgument>(incomingValue));

   // Otherwise, our copy_value may not be post-dominated by our phi. To
   // work around that, we need to insert destroys along the other
   // paths. So set base to the first instruction in our argument's block,
   // so we can insert destroys for our base.
   return &*incomingValue->getParentBlock()->begin();
}

static bool
terminatorHasAnyKnownPhis(TermInst *ti,
                          ArrayRef<PILPhiArgument *> insertedPhiNodesSorted) {
   for (auto succArgList : ti->getSuccessorBlockArguments()) {
      if (llvm::any_of(succArgList, [&](PILPhiArgument *arg) {
         return binary_search(insertedPhiNodesSorted, arg);
      })) {
         return true;
      }
   }

   return false;
}

namespace {

class PhiNodeCopyCleanupInserter {
   llvm::SmallMapVector<PILValue, unsigned, 8> incomingValues;

   /// Map from index -> (incomingValueIndex, copy).
   ///
   /// We are going to stable_sort this array using the indices of
   /// incomingValueIndex. This will ensure that we always visit in
   /// insertion order our incoming values (since the indices we are
   /// sorting by are the count of incoming values we have seen so far
   /// when we see the incoming value) and maintain the internal
   /// insertion sort within our range as well. This ensures that we
   /// visit our incoming values in visitation order and that within
   /// their own values, also visit them in visitation order with
   /// respect to each other.
   SmallVector<std::pair<unsigned, CopyValueInst *>, 16> copiesToCleanup;

   /// The lifetime frontier that we use to compute lifetime endpoints
   /// when emitting cleanups.
   ValueLifetimeAnalysis::Frontier lifetimeFrontier;

public:
   PhiNodeCopyCleanupInserter() = default;

   void trackNewCleanup(PILValue incomingValue, CopyValueInst *copy) {
      auto entry = std::make_pair(incomingValue, incomingValues.size());
      auto iter = incomingValues.insert(entry);
      // If we did not succeed, then iter.first.second is the index of
      // incoming value. Otherwise, it will be nextIndex.
      copiesToCleanup.emplace_back(iter.first->second, copy);
   }

   void emit(DeadEndBlocks &deadEndBlocks) &&;
};

} // end anonymous namespace

void PhiNodeCopyCleanupInserter::emit(DeadEndBlocks &deadEndBlocks) && {
   // READ THIS: We are being very careful here to avoid allowing for
   // non-determinism to enter here.
   //
   // 1. First we create a list of indices of our phi node data. Then we use a
   //    stable sort those indices into the order in which our phi node cleanups
   //    would be in if we compared just using incomingValues. We use a stable
   //    sort here to ensure that within the same "cohort" of values, our order
   //    is insertion order.
   //
   // 2. We go through the list of phiNodeCleanupStates in insertion order. We
   //    also maintain a set of already visited base values. When we visit the
   //    first phiNodeCleanupState for a specific phi, we process the phi
   //    then. This ensures that we always process the phis in insertion order as
   //    well.
   SmallVector<unsigned, 32> copiesToCleanupIndicesSorted;
   llvm::copy(indices(copiesToCleanup),
              std::back_inserter(copiesToCleanupIndicesSorted));

   stable_sort(copiesToCleanupIndicesSorted,
               [&](unsigned lhsIndex, unsigned rhsIndex) {
                  unsigned lhs = copiesToCleanup[lhsIndex].first;
                  unsigned rhs = copiesToCleanup[rhsIndex].first;
                  return lhs < rhs;
               });

   for (auto ii = copiesToCleanupIndicesSorted.begin(),
           ie = copiesToCleanupIndicesSorted.end();
        ii != ie;) {
      unsigned incomingValueIndex = copiesToCleanup[*ii].first;

      // First find the end of the values for which ii does not equal baseValue.
      auto rangeEnd = std::find_if_not(std::next(ii), ie, [&](unsigned index) {
         return incomingValueIndex == copiesToCleanup[index].first;
      });

      POLAR_DEFER {
         // Once we have finished processing, set ii to rangeEnd. This ensures that
         // the code below does not need to worry about updating the iterator.
         ii = rangeEnd;
      };

      PILValue incomingValue =
         std::next(incomingValues.begin(), incomingValueIndex)->first;
      CopyValueInst *phiCopy = copiesToCleanup[*ii].second;
      auto *insertPt = getNonPhiBlockIncomingValueDef(incomingValue, phiCopy);
      auto loc = RegularLocation::getAutoGeneratedLocation();

      // Before we do anything, see if we have a single cleanup state. In such a
      // case, we could have that we have a phi node as an incoming value and a
      // copy_value in that same block. In such a case, we want to just insert the
      // copy and continue. This means that
      // cleanupState.getNonPhiBlockIncomingValueDef() should always return a
      // non-null value in the code below.
      if (std::next(ii) == rangeEnd && isa<PILArgument>(incomingValue) &&
          !insertPt) {
         PILBasicBlock *phiBlock = phiCopy->getParent();
         PILBuilderWithScope builder(phiBlock->getTerminator());
         builder.createDestroyValue(loc, incomingValue);
         continue;
      }

      // Otherwise, we know that we have for this incomingValue, multiple
      // potential insert pts that we need to handle at the same time with our
      // lifetime query. Gather up those uses.
      SmallVector<PILInstruction *, 8> users;
      transform(llvm::make_range(ii, rangeEnd), std::back_inserter(users),
                [&](unsigned index) { return copiesToCleanup[index].second; });

      // Then lifetime extend our base over the copy_value.
      assert(lifetimeFrontier.empty());
      auto *def = getNonPhiBlockIncomingValueDef(incomingValue, phiCopy);
      assert(def && "Should never have a nullptr here since we handled all of "
                    "the single block cases earlier");
      ValueLifetimeAnalysis analysis(def, users);
      bool foundCriticalEdges = !analysis.computeFrontier(
         lifetimeFrontier, ValueLifetimeAnalysis::DontModifyCFG, &deadEndBlocks);
      (void)foundCriticalEdges;
      assert(!foundCriticalEdges);

      while (!lifetimeFrontier.empty()) {
         auto *insertPoint = lifetimeFrontier.pop_back_val();
         PILBuilderWithScope builder(insertPoint);
         builder.createDestroyValue(loc, incomingValue);
      }
   }
}

void AvailableValueAggregator::addHandOffCopyDestroysForPhis(
   PILInstruction *load, PILValue newVal) {
   assert(isa<LoadBorrowInst>(load) || isa<LoadInst>(load));

   SmallPtrSet<PILBasicBlock *, 8> visitedBlocks;
   SmallVector<PILBasicBlock *, 8> leakingBlocks;
   SmallVector<std::pair<PILBasicBlock *, PILValue>, 8> incomingValues;
   auto loc = RegularLocation::getAutoGeneratedLocation();

#ifndef NDEBUG
   LLVM_DEBUG(llvm::dbgs() << "Inserted Phis!\n");
   for (auto *phi : insertedPhiNodes) {
      LLVM_DEBUG(llvm::dbgs() << "Phi: " << *phi);
   }
#endif

   // Before we begin, identify the offset for all phis that are intermediate
   // phis inserted by the SSA updater. We are taking advantage of the fact that
   // the SSA updater just constructs the web without knowledge of ownership. So
   // if a phi node is only used by another phi node that we inserted, then we
   // have an intermediate phi node.
   //
   // TODO: There should be a better way of doing this than doing a copy + sort.
   SmallVector<PILPhiArgument *, 32> insertedPhiNodesSorted;
   llvm::copy(insertedPhiNodes, std::back_inserter(insertedPhiNodesSorted));
   llvm::sort(insertedPhiNodesSorted);

   SmallBitVector intermediatePhiOffsets(insertedPhiNodes.size());
   for (unsigned i : indices(insertedPhiNodes)) {
      if (TermInst *termInst =
             insertedPhiNodes[i]->getSingleUserOfType<TermInst>()) {
         // Only set the value if we find termInst has a successor with a phi node
         // in our insertedPhiNodes.
         if (terminatorHasAnyKnownPhis(termInst, insertedPhiNodesSorted)) {
            intermediatePhiOffsets.set(i);
         }
      }
   }

   // First go through all of our phi nodes doing the following:
   //
   // 1. If any of the phi node have a copy_value as an operand, we know that the
   //    copy_value does not dominate our final definition since otherwise the
   //    SSA updater would not have inserted a phi node here. In such a case
   //    since we may not have that the copy_value is post-dominated by the phi,
   //    we need to insert a copy_value at the phi to allow for post-domination
   //    and then use the ValueLifetimeChecker to determine the rest of the
   //    frontier for the base value.
   //
   // 2. If our phi node is used by another phi node, we run into a similar
   //    problem where we could have that our original phi node does not dominate
   //    our final definition (since the SSA updater would not have inserted the
   //    phi) and may not be strongly control dependent on our phi. To work
   //    around this problem, we insert at the phi a copy_value to allow for the
   //    phi to post_dominate its copy and then extend the lifetime of the phied
   //    value over that copy.
   //
   // As an extra complication to this, when we insert compensating releases for
   // any copy_values from (1), we need to insert the destroy_value on "base
   // values" (either a copy_value or the first instruction of a phi argument's
   // block) /after/ we have found all of the base_values to ensure that if the
   // same base value is used by multiple phis, we do not insert too many destroy
   // value.
   //
   // NOTE: At first glance one may think that such a problem could not occur
   // with phi nodes as well. Sadly if we allow for double backedge loops, it is
   // possible (there may be more cases).
   PhiNodeCopyCleanupInserter cleanupInserter;

   for (unsigned i : indices(insertedPhiNodes)) {
      auto *phi = insertedPhiNodes[i];

      // If our phi is not owned, continue. No fixes are needed.
      if (phi->getOwnershipKind() != ValueOwnershipKind::Owned)
         continue;

      LLVM_DEBUG(llvm::dbgs() << "Visiting inserted phi: " << *phi);
      // Otherwise, we have a copy_value that may not be strongly control
      // equivalent with our phi node. In such a case, we need to use
      // ValueLifetimeAnalysis to lifetime extend the copy such that we can
      // produce a new copy_value at the phi. We insert destroys along the
      // frontier.
      visitedBlocks.clear();
      leakingBlocks.clear();
      incomingValues.clear();

      phi->getIncomingPhiValues(incomingValues);
      unsigned phiIndex = phi->getIndex();
      for (auto pair : incomingValues) {
         PILValue value = pair.second;

         // If we had a non-trivial type with non-owned ownership, we will not see
         // a copy_value, so skip them here.
         if (value.getOwnershipKind() != ValueOwnershipKind::Owned)
            continue;

         // Otherwise, value should be from a copy_value or a phi node.
         assert(isa<CopyValueInst>(value) || isa<PILPhiArgument>(value));

         // If we have a copy_value, remove it from the inserted insts set so we
         // skip it when we start processing insertedInstrs.
         if (auto *cvi = dyn_cast<CopyValueInst>(value)) {
            copyValueProcessedWithPhiNodes.insert(cvi);

            // Then check if our termInst is in the same block as our copy_value. In
            // such a case, we can just use the copy_value as our phi's value
            // without needing to worry about any issues around control equivalence.
            if (pair.first == cvi->getParent())
               continue;
         } else {
            assert(isa<PILPhiArgument>(value));
         }

         // Otherwise, insert a copy_value instruction right before the phi. We use
         // that for our actual phi.
         auto *termInst = pair.first->getTerminator();
         PILBuilderWithScope builder(termInst);
         CopyValueInst *phiCopy = builder.createCopyValue(loc, value);
         termInst->setOperand(phiIndex, phiCopy);

         // Now that we know our base, phi, phiCopy for this specific incoming
         // value, append it to the phiNodeClenaupState so we can insert
         // destroy_values late after we visit all insertedPhiNodes.
         cleanupInserter.trackNewCleanup(value, phiCopy);
      }

      // Then see if our phi is an intermediate phi. If it is an intermediate phi,
      // we know that this is not the phi node that is post-dominated by the
      // load_borrow and that we will lifetime extend it via the child
      // phi. Instead, we need to just ensure that our phi arg does not leak onto
      // its set of post-dominating paths, subtracting from that set the path
      // through our terminator use.
      if (intermediatePhiOffsets[i]) {
         continue;
      }

      // If we reach this point, then we know that we are a phi node that actually
      // dominates our user so we need to lifetime extend it over the
      // load_borrow. Thus insert copy_value along the incoming edges and then
      // lifetime extend the phi node over the load_borrow.
      //
      // The linear lifetime checker doesn't care if the passed in load is
      // actually a user of our copy_value. What we care about is that the load is
      // guaranteed to be in the block where we have reformed the tuple in a
      // consuming manner. This means if we add it as the consuming use of the
      // copy, we can find the leaking places if any exist.
      //
      // Then perform the linear lifetime check. If we succeed, continue. We have
      // no further work to do.
      auto errorKind = ownership::ErrorBehaviorKind::ReturnFalse;
      LinearLifetimeChecker checker(visitedBlocks, deadEndBlocks);
      auto error = checker.checkValue(
         phi, {BranchPropagatedUser(&load->getAllOperands()[0])}, {}, errorKind,
         &leakingBlocks);

      if (!error.getFoundError()) {
         // If we did not find an error, then our copy_value must be strongly
         // control equivalent as our load_borrow. So just insert a destroy_value
         // for the copy_value.
         auto next = std::next(load->getIterator());
         PILBuilderWithScope builder(next);
         builder.emitDestroyValueOperation(next->getLoc(), phi);
         continue;
      }

      // Ok, we found some leaking blocks and potentially a loop. If we do not
      // find a loop, insert the destroy_value after the load_borrow. We do not do
      // this if we found a loop since our leaking blocks will lifetime extend the
      // value over the loop.
      if (!error.getFoundOverConsume()) {
         auto next = std::next(load->getIterator());
         PILBuilderWithScope builder(next);
         builder.emitDestroyValueOperation(next->getLoc(), phi);
      }

      // Ok, we found some leaking blocks. Insert destroys at the beginning of
      // these blocks for our copy_value.
      for (auto *bb : leakingBlocks) {
         PILBuilderWithScope b(bb->begin());
         b.emitDestroyValueOperation(loc, phi);
      }
   }

   // Alright! In summary, we just lifetime extended all of our phis,
   // lifetime extended them to the load block, and inserted phi copies
   // at all of our intermediate phi nodes. Now we need to cleanup and
   // insert all of the compensating destroy_value that we need.
   std::move(cleanupInserter).emit(deadEndBlocks);

   // Clear the phi node array now that we are done.
   insertedPhiNodes.clear();
}

void AvailableValueAggregator::addMissingDestroysForCopiedValues(
   PILInstruction *load, PILValue newVal) {
   assert(B.hasOwnership() &&
          "We assume this is only called if we have ownership");

   SmallPtrSet<PILBasicBlock *, 8> visitedBlocks;
   SmallVector<PILBasicBlock *, 8> leakingBlocks;
   auto loc = RegularLocation::getAutoGeneratedLocation();

   for (auto *inst : insertedInsts) {
      // Otherwise, see if this is a load [copy]. It if it a load [copy], then we
      // know that the load [copy] must be in the load block meaing we can just
      // put a destroy_value /after/ the load_borrow to ensure that the value
      // lives long enough for us to copy_value it or a derived value for the
      // begin_borrow.
      if (auto *li = dyn_cast<LoadInst>(inst)) {
         if (li->getOwnershipQualifier() == LoadOwnershipQualifier::Copy) {
            assert(li->getParent() == load->getParent());
            auto next = std::next(load->getIterator());
            PILBuilderWithScope builder(next);
            builder.emitDestroyValueOperation(next->getLoc(), li);
            continue;
         }
      }

      // Our copy_value may have been unset above if it was used by a phi
      // (implying it does not dominate our final user).
      auto *cvi = dyn_cast<CopyValueInst>(inst);
      if (!cvi)
         continue;

      // If we already handled this copy_value above when handling phi nodes, just
      // continue.
      if (copyValueProcessedWithPhiNodes.count(cvi))
         continue;

      // Clear our state.
      visitedBlocks.clear();
      leakingBlocks.clear();

      // The linear lifetime checker doesn't care if the passed in load is
      // actually a user of our copy_value. What we care about is that the load is
      // guaranteed to be in the block where we have reformed the tuple in a
      // consuming manner. This means if we add it as the consuming use of the
      // copy, we can find the leaking places if any exist.
      //
      // Then perform the linear lifetime check. If we succeed, continue. We have
      // no further work to do.
      auto errorKind = ownership::ErrorBehaviorKind::ReturnFalse;
      LinearLifetimeChecker checker(visitedBlocks, deadEndBlocks);
      auto error = checker.checkValue(
         cvi, {BranchPropagatedUser(&load->getAllOperands()[0])}, {}, errorKind,
         &leakingBlocks);

      if (!error.getFoundError()) {
         // If we did not find an error, then our copy_value must be strongly
         // control equivalent as our load_borrow. So just insert a destroy_value
         // for the copy_value.
         auto next = std::next(load->getIterator());
         PILBuilderWithScope builder(next);
         builder.emitDestroyValueOperation(next->getLoc(), cvi);
         continue;
      }

      // Ok, we found some leaking blocks and potentially a loop. If we do not
      // find a loop, insert the destroy_value after the load_borrow. We do not do
      // this if we found a loop since our leaking blocks will lifetime extend the
      // value over the loop.
      if (!error.getFoundOverConsume()) {
         auto next = std::next(load->getIterator());
         PILBuilderWithScope builder(next);
         builder.emitDestroyValueOperation(next->getLoc(), cvi);
      }

      // Ok, we found some leaking blocks. Insert destroys at the beginning of
      // these blocks for our copy_value.
      for (auto *bb : leakingBlocks) {
         PILBuilderWithScope b(bb->begin());
         b.emitDestroyValueOperation(loc, cvi);
      }
   }
}

//===----------------------------------------------------------------------===//
//                          Available Value Dataflow
//===----------------------------------------------------------------------===//

namespace {

/// Given a piece of memory, the memory's uses, and destroys perform a single
/// round of semi-optimistic backwards dataflow for each use. The result is the
/// set of available values that reach the specific use of the field in the
/// allocated object.
///
/// The general form of the algorithm is that in our constructor, we analyze our
/// uses and determine available values. Then users call computeAvailableValues
/// which looks backwards up the control flow graph for available values that we
/// can use.
///
/// NOTE: The reason why we say that the algorithm is semi-optimistic is that we
/// assume that all incoming elements into a loopheader will be the same. If we
/// find a conflict, we record it and fail.
class AvailableValueDataflowContext {
   /// The base memory we are performing dataflow upon.
   AllocationInst *TheMemory;

   /// The number of sub elements of our memory.
   unsigned NumMemorySubElements;

   /// The set of uses that we are tracking. This is only here so we can update
   /// when exploding copy_addr. It would be great if we did not have to store
   /// this.
   SmallVectorImpl<PMOMemoryUse> &Uses;

   /// The set of blocks with local definitions.
   ///
   /// We use this to determine if we should visit a block or look at a block's
   /// predecessors during dataflow for an available value.
   llvm::SmallPtrSet<PILBasicBlock *, 32> HasLocalDefinition;

   /// The set of blocks that have definitions which specifically "kill" the
   /// given value. If a block is in this set, there must be an instruction in
   /// LoadTakeUse whose parent is the block. This is just used to speed up
   /// computation.
   ///
   /// NOTE: These are not considered escapes.
   llvm::SmallPtrSet<PILBasicBlock *, 32> HasLocalKill;

   /// This is a set of load takes that we are tracking. HasLocalKill is the set
   /// of parent blocks of these instructions.
   llvm::SmallPtrSet<PILInstruction *, 8> LoadTakeUses;

   /// This is a map of uses that are not loads (i.e., they are Stores,
   /// InOutUses, and Escapes), to their entry in Uses.
   llvm::SmallDenseMap<PILInstruction *, unsigned, 16> NonLoadUses;

   /// Does this value escape anywhere in the function. We use this very
   /// conservatively.
   bool HasAnyEscape = false;

public:
   AvailableValueDataflowContext(AllocationInst *TheMemory,
                                 unsigned NumMemorySubElements,
                                 SmallVectorImpl<PMOMemoryUse> &Uses);

   /// Try to compute available values for "TheMemory" at the instruction \p
   /// StartingFrom. We only compute the values for set bits in \p
   /// RequiredElts. We return the vailable values in \p Result. If any available
   /// values were found, return true. Otherwise, return false.
   bool computeAvailableValues(PILInstruction *StartingFrom,
                               unsigned FirstEltOffset,
                               unsigned NumLoadSubElements,
                               SmallBitVector &RequiredElts,
                               SmallVectorImpl<AvailableValue> &Result);

   /// Return true if the box has escaped at the specified instruction.  We are
   /// not
   /// allowed to do load promotion in an escape region.
   bool hasEscapedAt(PILInstruction *I);

   /// Explode a copy_addr, updating the Uses at the same time.
   void explodeCopyAddr(CopyAddrInst *CAI);

private:
   PILModule &getModule() const { return TheMemory->getModule(); }

   void updateAvailableValues(PILInstruction *Inst,
                              SmallBitVector &RequiredElts,
                              SmallVectorImpl<AvailableValue> &Result,
                              SmallBitVector &ConflictingValues);
   void computeAvailableValuesFrom(
      PILBasicBlock::iterator StartingFrom, PILBasicBlock *BB,
      SmallBitVector &RequiredElts,
      SmallVectorImpl<AvailableValue> &Result,
      llvm::SmallDenseMap<PILBasicBlock *, SmallBitVector, 32>
      &VisitedBlocks,
      SmallBitVector &ConflictingValues);
};

} // end anonymous namespace

AvailableValueDataflowContext::AvailableValueDataflowContext(
   AllocationInst *InputTheMemory, unsigned NumMemorySubElements,
   SmallVectorImpl<PMOMemoryUse> &InputUses)
   : TheMemory(InputTheMemory), NumMemorySubElements(NumMemorySubElements),
     Uses(InputUses) {
   // The first step of processing an element is to collect information about the
   // element into data structures we use later.
   for (unsigned ui : indices(Uses)) {
      auto &Use = Uses[ui];
      assert(Use.Inst && "No instruction identified?");

      // If we have a load...
      if (Use.Kind == PMOUseKind::Load) {
         // Skip load borrow use and open_existential_addr.
         if (isa<LoadBorrowInst>(Use.Inst) || isa<OpenExistentialAddrInst>(Use.Inst))
            continue;

         // That is not a load take, continue. Otherwise, stash the load [take].
         if (auto *LI = dyn_cast<LoadInst>(Use.Inst)) {
            if (LI->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
               LoadTakeUses.insert(LI);
               HasLocalKill.insert(LI->getParent());
            }
            continue;
         }

         // If we have a copy_addr as our load, it means we are processing a source
         // of the value. If the copy_addr is taking from the source, we need to
         // treat it like a load take use.
         if (auto *CAI = dyn_cast<CopyAddrInst>(Use.Inst)) {
            if (CAI->isTakeOfSrc() == IsTake) {
               LoadTakeUses.insert(CAI);
               HasLocalKill.insert(CAI->getParent());
            }
            continue;
         }

         llvm_unreachable("Unhandled PILInstructionKind for PMOUseKind::Load?!");
      }

      // Keep track of all the uses that aren't loads.
      NonLoadUses[Use.Inst] = ui;
      HasLocalDefinition.insert(Use.Inst->getParent());

      if (Use.Kind == PMOUseKind::Escape) {
         // Determine which blocks the value can escape from.  We aren't allowed to
         // promote loads in blocks reachable from an escape point.
         HasAnyEscape = true;
      }
   }

   // If isn't really a use, but we account for the alloc_box/mark_uninitialized
   // as a use so we see it in our dataflow walks.
   NonLoadUses[TheMemory] = ~0U;
   HasLocalDefinition.insert(TheMemory->getParent());
}

// This function takes in the current (potentially uninitialized) available
// values for theMemory and for the subset of AvailableValues corresponding to
// \p address either:
//
// 1. If uninitialized, optionally initialize the available value with a new
//    PILValue. It is optional since in certain cases, (for instance when
//    invalidating one just wants to skip empty available values).
//
// 2. Given an initialized value, either add the given instruction as an
//    insertion point or state that we have a conflict.
static inline void updateAvailableValuesHelper(
   SingleValueInstruction *theMemory, PILInstruction *inst, PILValue address,
   SmallBitVector &requiredElts, SmallVectorImpl<AvailableValue> &result,
   SmallBitVector &conflictingValues,
   function_ref<Optional<AvailableValue>(unsigned)> defaultFunc,
   function_ref<bool(AvailableValue &, unsigned)> isSafeFunc) {
   auto &mod = theMemory->getModule();
   unsigned startSubElt = computeSubelement(address, theMemory);

   // TODO: Is this needed now?
   assert(startSubElt != ~0U && "Store within enum projection not handled");
   for (unsigned i : range(getNumSubElements(
      address->getType().getObjectType(), mod,
      TypeExpansionContext(*theMemory->getFunction())))) {
      // If this element is not required, don't fill it in.
      if (!requiredElts[startSubElt + i])
         continue;

      // At this point we know that we will either mark the value as conflicting
      // or give it a value.
      requiredElts[startSubElt + i] = false;

      // First see if we have an entry at all.
      auto &entry = result[startSubElt + i];

      // If we don't...
      if (!entry) {
         // and we are told to initialize it, do so.
         if (auto defaultValue = defaultFunc(i)) {
            entry = std::move(defaultValue.getValue());
         } else {
            // Otherwise, mark this as a conflicting value. There is some available
            // value here, we just do not know what it is at this point. This
            // ensures that if we visit a kill where we do not have an entry yet, we
            // properly invalidate our state.
            conflictingValues[startSubElt + i] = true;
         }
         continue;
      }

      // Check if our caller thinks that the value currently in entry is
      // compatible with \p inst. If not, mark the values as conflicting and
      // continue.
      if (!isSafeFunc(entry, i)) {
         conflictingValues[startSubElt + i] = true;
         continue;
      }

      // Otherwise, we found another insertion point for our available
      // value. Today this will always be a Store.
      entry.addInsertionPoint(cast<StoreInst>(inst));
   }
}

void AvailableValueDataflowContext::updateAvailableValues(
   PILInstruction *Inst, SmallBitVector &RequiredElts,
   SmallVectorImpl<AvailableValue> &Result,
   SmallBitVector &ConflictingValues) {

   // If we are visiting a load [take], it invalidates the underlying available
   // values.
   //
   // NOTE: Since we are always looking back from the instruction to promote,
   // when we attempt to promote the load [take] itself, we will never hit this
   // code since.
   if (auto *LI = dyn_cast<LoadInst>(Inst)) {
      // First see if this is a load inst that we are tracking.
      if (LoadTakeUses.count(LI)) {
         updateAvailableValuesHelper(TheMemory, LI, LI->getOperand(), RequiredElts,
                                     Result, ConflictingValues,
            /*default*/
                                     [](unsigned) -> Optional<AvailableValue> {
                                        // We never initialize values. We only
                                        // want to invalidate.
                                        return None;
                                     },
            /*isSafe*/
                                     [](AvailableValue &, unsigned) -> bool {
                                        // Always assume values conflict.
                                        return false;
                                     });
         return;
      }
   }

   // Handle store.
   if (auto *SI = dyn_cast<StoreInst>(Inst)) {
      updateAvailableValuesHelper(
         TheMemory, SI, SI->getDest(), RequiredElts, Result, ConflictingValues,
         /*default*/
         [&](unsigned ResultOffset) -> Optional<AvailableValue> {
            Optional<AvailableValue> Result;
            Result.emplace(SI->getSrc(), ResultOffset, SI);
            return Result;
         },
         /*isSafe*/
         [&](AvailableValue &Entry, unsigned ResultOffset) -> bool {
            // TODO: This is /really/, /really/, conservative. This basically
            // means that if we do not have an identical store, we will not
            // promote.
            return Entry.getValue() == SI->getSrc() &&
                   Entry.getSubElementNumber() == ResultOffset;
         });
      return;
   }

   // If we got here from an apply, we must either be initializing the element
   // via an @out parameter or we are trying to model an invalidating load of the
   // value (e.x.: indirect_in, indirect_inout).

   // If we get here with a copy_addr, we must either be storing into the element
   // or tracking some sort of take of the src. First check if we are taking (in
   // which case, we just track invalidation of src) and continue. Otherwise we
   // must be storing into the copy_addr so see which loaded subelements are
   // being used, and if so, explode the copy_addr to its individual pieces.
   if (auto *CAI = dyn_cast<CopyAddrInst>(Inst)) {
      // If we have a load take use, we must be tracking a store of CAI.
      if (LoadTakeUses.count(CAI)) {
         updateAvailableValuesHelper(TheMemory, CAI, CAI->getSrc(), RequiredElts,
                                     Result, ConflictingValues,
            /*default*/
                                     [](unsigned) -> Optional<AvailableValue> {
                                        // We never give values default initialized
                                        // values. We only want to invalidate.
                                        return None;
                                     },
            /*isSafe*/
                                     [](AvailableValue &, unsigned) -> bool {
                                        // Always assume values conflict.
                                        return false;
                                     });
         return;
      }

      unsigned StartSubElt = computeSubelement(CAI->getDest(), TheMemory);
      assert(StartSubElt != ~0U && "Store within enum projection not handled");
      PILType ValTy = CAI->getDest()->getType();

      bool AnyRequired = false;
      for (unsigned i : range(getNumSubElements(
         ValTy, getModule(), TypeExpansionContext(*CAI->getFunction())))) {
         // If this element is not required, don't fill it in.
         AnyRequired = RequiredElts[StartSubElt+i];
         if (AnyRequired) break;
      }

      // If this is a copy addr that doesn't intersect the loaded subelements,
      // just continue with an unmodified load mask.
      if (!AnyRequired)
         return;

      // If the copyaddr is of a non-loadable type, we can't promote it.  Just
      // consider it to be a clobber.
      if (CAI->getSrc()->getType().isLoadable(*CAI->getFunction())) {
         // Otherwise, some part of the copy_addr's value is demanded by a load, so
         // we need to explode it to its component pieces.  This only expands one
         // level of the copyaddr.
         explodeCopyAddr(CAI);

         // The copy_addr doesn't provide any values, but we've arranged for our
         // iterators to visit the newly generated instructions, which do.
         return;
      }
   }

   // TODO: inout apply's should only clobber pieces passed in.

   // Otherwise, this is some unknown instruction, conservatively assume that all
   // values are clobbered.
   RequiredElts.clear();
   ConflictingValues = SmallBitVector(Result.size(), true);
   return;
}

bool AvailableValueDataflowContext::computeAvailableValues(
   PILInstruction *StartingFrom, unsigned FirstEltOffset,
   unsigned NumLoadSubElements, SmallBitVector &RequiredElts,
   SmallVectorImpl<AvailableValue> &Result) {
   llvm::SmallDenseMap<PILBasicBlock*, SmallBitVector, 32> VisitedBlocks;
   SmallBitVector ConflictingValues(Result.size());

   computeAvailableValuesFrom(StartingFrom->getIterator(),
                              StartingFrom->getParent(), RequiredElts, Result,
                              VisitedBlocks, ConflictingValues);
   // If there are no values available at this load point, then we fail to
   // promote this load and there is nothing to do.
   SmallBitVector AvailableValueIsPresent(NumMemorySubElements);

   for (unsigned i :
      range(FirstEltOffset, FirstEltOffset + NumLoadSubElements)) {
      AvailableValueIsPresent[i] = Result[i].getValue();
   }

   // If we do not have any values available, bail.
   if (AvailableValueIsPresent.none())
      return false;

   // Otherwise, if we have any conflicting values, explicitly mask them out of
   // the result, so we don't pick one arbitrary available value.
   if (ConflictingValues.none()) {
      return true;
   }

   // At this point, we know that we have /some/ conflicting values and some
   // available values.
   if (AvailableValueIsPresent.reset(ConflictingValues).none())
      return false;

   // Otherwise, mask out the available values and return true. We have at least
   // 1 available value.
   int NextIter = ConflictingValues.find_first();
   while (NextIter != -1) {
      assert(NextIter >= 0 && "Int can not be represented?!");
      unsigned Iter = NextIter;
      Result[Iter] = {};
      NextIter = ConflictingValues.find_next(Iter);
   }

   return true;
}

void AvailableValueDataflowContext::computeAvailableValuesFrom(
   PILBasicBlock::iterator StartingFrom, PILBasicBlock *BB,
   SmallBitVector &RequiredElts, SmallVectorImpl<AvailableValue> &Result,
   llvm::SmallDenseMap<PILBasicBlock *, SmallBitVector, 32>
   &VisitedBlocks,
   SmallBitVector &ConflictingValues) {
   assert(!RequiredElts.none() && "Scanning with a goal of finding nothing?");

   // If there is a potential modification in the current block, scan the block
   // to see if the store, escape, or load [take] is before or after the load. If
   // it is before, check to see if it produces the value we are looking for.
   bool shouldCheckBlock =
      HasLocalDefinition.count(BB) || HasLocalKill.count(BB);
   if (shouldCheckBlock) {
      for (PILBasicBlock::iterator BBI = StartingFrom; BBI != BB->begin();) {
         PILInstruction *TheInst = &*std::prev(BBI);
         // If this instruction is unrelated to the element, ignore it.
         if (!NonLoadUses.count(TheInst) && !LoadTakeUses.count(TheInst)) {
            --BBI;
            continue;
         }

         // Given an interesting instruction, incorporate it into the set of
         // results, and filter down the list of demanded subelements that we still
         // need.
         updateAvailableValues(TheInst, RequiredElts, Result, ConflictingValues);

         // If this satisfied all of the demanded values, we're done.
         if (RequiredElts.none())
            return;

         // Otherwise, keep scanning the block.  If the instruction we were looking
         // at just got exploded, don't skip the next instruction.
         if (&*std::prev(BBI) == TheInst)
            --BBI;
      }
   }

   // Otherwise, we need to scan up the CFG looking for available values.
   for (auto PI = BB->pred_begin(), E = BB->pred_end(); PI != E; ++PI) {
      PILBasicBlock *PredBB = *PI;

      // If the predecessor block has already been visited (potentially due to a
      // cycle in the CFG), don't revisit it.  We can do this safely because we
      // are optimistically assuming that all incoming elements in a cycle will be
      // the same.  If we ever detect a conflicting element, we record it and do
      // not look at the result.
      auto Entry = VisitedBlocks.insert({PredBB, RequiredElts});
      if (!Entry.second) {
         // If we are revisiting a block and asking for different required elements
         // then anything that isn't agreeing is in conflict.
         const auto &PrevRequired = Entry.first->second;
         if (PrevRequired != RequiredElts) {
            ConflictingValues |= (PrevRequired ^ RequiredElts);

            RequiredElts &= ~ConflictingValues;
            if (RequiredElts.none())
               return;
         }
         continue;
      }

      // Make sure to pass in the same set of required elements for each pred.
      SmallBitVector Elts = RequiredElts;
      computeAvailableValuesFrom(PredBB->end(), PredBB, Elts, Result,
                                 VisitedBlocks, ConflictingValues);

      // If we have any conflicting values, don't bother searching for them.
      RequiredElts &= ~ConflictingValues;
      if (RequiredElts.none())
         return;
   }
}

/// Explode a copy_addr instruction of a loadable type into lower level
/// operations like loads, stores, retains, releases, retain_value, etc.
void AvailableValueDataflowContext::explodeCopyAddr(CopyAddrInst *CAI) {
   LLVM_DEBUG(llvm::dbgs() << "  -- Exploding copy_addr: " << *CAI << "\n");

   PILType ValTy = CAI->getDest()->getType().getObjectType();

   PILFunction *F = CAI->getFunction();
   auto &TL = F->getTypeLowering(ValTy);

   // Keep track of the new instructions emitted.
   SmallVector<PILInstruction *, 4> NewInsts;
   PILBuilder B(CAI, &NewInsts);
   B.setCurrentDebugScope(CAI->getDebugScope());

   // Use type lowering to lower the copyaddr into a load sequence + store
   // sequence appropriate for the type.
   PILValue StoredValue =
      TL.emitLoadOfCopy(B, CAI->getLoc(), CAI->getSrc(), CAI->isTakeOfSrc());

   TL.emitStoreOfCopy(B, CAI->getLoc(), StoredValue, CAI->getDest(),
                      CAI->isInitializationOfDest());

   // Update our internal state for this being gone.
   NonLoadUses.erase(CAI);
   LoadTakeUses.erase(CAI);
   // NOTE: We do not need to update HasLocalKill since the copy_addr
   // and the loads/stores will have the same parent block.

   // Remove the copy_addr from Uses.  A single copy_addr can appear multiple
   // times if the source and dest are to elements within a single aggregate, but
   // we only want to pick up the CopyAddrKind from the store.
   PMOMemoryUse LoadUse, StoreUse;
   for (auto &Use : Uses) {
      if (Use.Inst != CAI)
         continue;

      if (Use.Kind == PMOUseKind::Load) {
         assert(LoadUse.isInvalid());
         LoadUse = Use;
      } else {
         assert(StoreUse.isInvalid());
         StoreUse = Use;
      }

      Use.Inst = nullptr;

      // Keep scanning in case the copy_addr appears multiple times.
   }

   assert((LoadUse.isValid() || StoreUse.isValid()) &&
          "we should have a load or a store, possibly both");
   assert(StoreUse.isInvalid() || StoreUse.Kind == Assign ||
          StoreUse.Kind == Initialization || StoreUse.Kind == InitOrAssign);

   // Now that we've emitted a bunch of instructions, including a load and store
   // but also including other stuff, update the internal state of
   // LifetimeChecker to reflect them.

   // Update the instructions that touch the memory.  NewInst can grow as this
   // iterates, so we can't use a foreach loop.
   for (auto *NewInst : NewInsts) {
      switch (NewInst->getKind()) {
         default:
            NewInst->dump();
            llvm_unreachable("Unknown instruction generated by copy_addr lowering");

         case PILInstructionKind::StoreInst:
            // If it is a store to the memory object (as oppose to a store to
            // something else), track it as an access.
            if (StoreUse.isValid()) {
               StoreUse.Inst = NewInst;
               // If our store use by the copy_addr is an assign, then we know that
               // before we store the new value, we loaded the old value implying that
               // our store is technically initializing memory when it occurs. So
               // change the kind to Initialization.
               if (StoreUse.Kind == Assign)
                  StoreUse.Kind = Initialization;
               NonLoadUses[NewInst] = Uses.size();
               Uses.push_back(StoreUse);
            }
            continue;

         case PILInstructionKind::LoadInst:
            // If it is a load from the memory object (as oppose to a load from
            // something else), track it as an access.  We need to explicitly check to
            // see if the load accesses "TheMemory" because it could either be a load
            // for the copy_addr source, or it could be a load corresponding to the
            // "assign" operation on the destination of the copyaddr.
            if (LoadUse.isValid() &&
                getAccessPathRoot(NewInst->getOperand(0)) == TheMemory) {
               if (auto *LI = dyn_cast<LoadInst>(NewInst)) {
                  if (LI->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
                     LoadTakeUses.insert(LI);
                     HasLocalKill.insert(LI->getParent());
                  }
               }
               LoadUse.Inst = NewInst;
               Uses.push_back(LoadUse);
            }
            continue;

         case PILInstructionKind::RetainValueInst:
         case PILInstructionKind::StrongRetainInst:
         case PILInstructionKind::StrongReleaseInst:
         case PILInstructionKind::ReleaseValueInst: // Destroy overwritten value
            // These are ignored.
            continue;
      }
   }

   // Next, remove the copy_addr itself.
   CAI->eraseFromParent();
}

bool AvailableValueDataflowContext::hasEscapedAt(PILInstruction *I) {
   // Return true if the box has escaped at the specified instruction.  We are
   // not allowed to do load promotion in an escape region.

   // FIXME: This is not an aggressive implementation.  :)

   // TODO: At some point, we should special case closures that just *read* from
   // the escaped value (by looking at the body of the closure).  They should not
   // prevent load promotion, and will allow promoting values like X in regions
   // dominated by "... && X != 0".
   return HasAnyEscape;
}

//===----------------------------------------------------------------------===//
//                          Allocation Optimization
//===----------------------------------------------------------------------===//

static PILType getMemoryType(AllocationInst *memory) {
   // Compute the type of the memory object.
   if (auto *abi = dyn_cast<AllocBoxInst>(memory)) {
      assert(abi->getBoxType()->getLayout()->getFields().size() == 1 &&
             "optimizing multi-field boxes not implemented");
      return getPILBoxFieldType(TypeExpansionContext(*abi->getFunction()),
                                abi->getBoxType(), abi->getModule().Types, 0);
   }

   assert(isa<AllocStackInst>(memory));
   return cast<AllocStackInst>(memory)->getElementType();
}

namespace {

/// This performs load promotion and deletes synthesized allocations if all
/// loads can be removed.
class AllocOptimize {

   PILModule &Module;

   /// This is either an alloc_box or alloc_stack instruction.
   AllocationInst *TheMemory;

   /// This is the PILType of the memory object.
   PILType MemoryType;

   /// The number of primitive subelements across all elements of this memory
   /// value.
   unsigned NumMemorySubElements;

   SmallVectorImpl<PMOMemoryUse> &Uses;
   SmallVectorImpl<PILInstruction *> &Releases;

   DeadEndBlocks &deadEndBlocks;

   /// A structure that we use to compute our available values.
   AvailableValueDataflowContext DataflowContext;

public:
   AllocOptimize(AllocationInst *memory, SmallVectorImpl<PMOMemoryUse> &uses,
                 SmallVectorImpl<PILInstruction *> &releases,
                 DeadEndBlocks &deadEndBlocks)
      : Module(memory->getModule()), TheMemory(memory),
        MemoryType(getMemoryType(memory)),
        NumMemorySubElements(getNumSubElements(
           MemoryType, Module, TypeExpansionContext(*memory->getFunction()))),
        Uses(uses), Releases(releases), deadEndBlocks(deadEndBlocks),
        DataflowContext(TheMemory, NumMemorySubElements, uses) {}

   bool optimizeMemoryAccesses();

   /// If the allocation is an autogenerated allocation that is only stored to
   /// (after load promotion) then remove it completely.
   bool tryToRemoveDeadAllocation();

private:
   Optional<std::pair<PILType, unsigned>>
   computeAvailableValues(PILValue SrcAddr, PILInstruction *Inst,
                          SmallVectorImpl<AvailableValue> &AvailableValues);
   bool promoteLoadCopy(LoadInst *li);
   bool promoteLoadBorrow(LoadBorrowInst *lbi);
   bool promoteCopyAddr(CopyAddrInst *cai);
   void promoteLoadTake(LoadInst *Inst, MutableArrayRef<AvailableValue> values);
   void promoteDestroyAddr(DestroyAddrInst *dai,
                           MutableArrayRef<AvailableValue> values);
   bool canPromoteTake(PILInstruction *i,
                       SmallVectorImpl<AvailableValue> &availableValues);
};

} // end anonymous namespace

Optional<std::pair<PILType, unsigned>> AllocOptimize::computeAvailableValues(
   PILValue SrcAddr, PILInstruction *Inst,
   SmallVectorImpl<AvailableValue> &AvailableValues) {
   // If the box has escaped at this instruction, we can't safely promote the
   // load.
   if (DataflowContext.hasEscapedAt(Inst))
      return None;

   PILType LoadTy = SrcAddr->getType().getObjectType();

   // If this is a load/copy_addr from a struct field that we want to promote,
   // compute the access path down to the field so we can determine precise
   // def/use behavior.
   unsigned FirstElt = computeSubelement(SrcAddr, TheMemory);

   // If this is a load from within an enum projection, we can't promote it since
   // we don't track subelements in a type that could be changing.
   if (FirstElt == ~0U)
      return None;

   unsigned NumLoadSubElements = getNumSubElements(
      LoadTy, Module, TypeExpansionContext(*TheMemory->getFunction()));

   // Set up the bitvector of elements being demanded by the load.
   SmallBitVector RequiredElts(NumMemorySubElements);
   RequiredElts.set(FirstElt, FirstElt + NumLoadSubElements);

   AvailableValues.resize(NumMemorySubElements);

   // Find out if we have any available values.  If no bits are demanded, we
   // trivially succeed. This can happen when there is a load of an empty struct.
   if (NumLoadSubElements != 0 &&
       !DataflowContext.computeAvailableValues(
          Inst, FirstElt, NumLoadSubElements, RequiredElts, AvailableValues))
      return None;

   return std::make_pair(LoadTy, FirstElt);
}

/// If we are able to optimize \p Inst, return the source address that
/// instruction is loading from. If we can not optimize \p Inst, then just
/// return an empty PILValue.
static PILValue tryFindSrcAddrForLoad(PILInstruction *i) {
   // We can always promote a load_borrow.
   if (auto *lbi = dyn_cast<LoadBorrowInst>(i))
      return lbi->getOperand();

   // We only handle load [copy], load [trivial], load and copy_addr right
   // now. Notably we do not support load [take] when promoting loads.
   if (auto *li = dyn_cast<LoadInst>(i))
      if (li->getOwnershipQualifier() != LoadOwnershipQualifier::Take)
         return li->getOperand();

   // If this is a CopyAddr, verify that the element type is loadable.  If not,
   // we can't explode to a load.
   auto *cai = dyn_cast<CopyAddrInst>(i);
   if (!cai || !cai->getSrc()->getType().isLoadable(*cai->getFunction()))
      return PILValue();
   return cai->getSrc();
}

/// At this point, we know that this element satisfies the definitive init
/// requirements, so we can try to promote loads to enable SSA-based dataflow
/// analysis.  We know that accesses to this element only access this element,
/// cross element accesses have been scalarized.
///
/// This returns true if the load has been removed from the program.
bool AllocOptimize::promoteLoadCopy(LoadInst *li) {
   // Note that we intentionally don't support forwarding of weak pointers,
   // because the underlying value may drop be deallocated at any time.  We would
   // have to prove that something in this function is holding the weak value
   // live across the promoted region and that isn't desired for a stable
   // diagnostics pass this like one.

   // First attempt to find a source addr for our "load" instruction. If we fail
   // to find a valid value, just return.
   PILValue srcAddr = tryFindSrcAddrForLoad(li);
   if (!srcAddr)
      return false;

   SmallVector<AvailableValue, 8> availableValues;
   auto result = computeAvailableValues(srcAddr, li, availableValues);
   if (!result.hasValue())
      return false;

   PILType loadTy = result->first;
   unsigned firstElt = result->second;

   // Aggregate together all of the subelements into something that has the same
   // type as the load did, and emit smaller loads for any subelements that were
   // not available. We are "propagating" a +1 available value from the store
   // points.
   AvailableValueAggregator agg(li, availableValues, Uses, deadEndBlocks,
                                AvailableValueExpectedOwnership::Copy);
   PILValue newVal = agg.aggregateValues(loadTy, li->getOperand(), firstElt);

   LLVM_DEBUG(llvm::dbgs() << "  *** Promoting load: " << *li);
   LLVM_DEBUG(llvm::dbgs() << "      To value: " << *newVal);
   ++NumLoadPromoted;

   // If we did not have ownership, we did not insert extra copies at our stores,
   // so we can just RAUW and return.
   if (!li->getFunction()->hasOwnership()) {
      li->replaceAllUsesWith(newVal);
      PILValue addr = li->getOperand();
      li->eraseFromParent();
      if (auto *addrI = addr->getDefiningInstruction())
         recursivelyDeleteTriviallyDeadInstructions(addrI);
      return true;
   }

   // If we inserted any copies, we created the copies at our stores. We know
   // that in our load block, we will reform the aggregate as appropriate at the
   // load implying that the value /must/ be fully consumed. If we promoted a +0
   // value, we created dominating destroys along those paths. Thus any leaking
   // blocks that we may have can be found by performing a linear lifetime check
   // over all copies that we found using the load as the "consuming uses" (just
   // for the purposes of identifying the consuming block).
   agg.fixupOwnership(li, newVal);

   // Now that we have fixed up all of our missing destroys, insert the copy
   // value for our actual load and RAUW.
   newVal = PILBuilderWithScope(li).emitCopyValueOperation(li->getLoc(), newVal);

   li->replaceAllUsesWith(newVal);
   PILValue addr = li->getOperand();
   li->eraseFromParent();
   if (auto *addrI = addr->getDefiningInstruction())
      recursivelyDeleteTriviallyDeadInstructions(addrI);
   return true;
}

bool AllocOptimize::promoteCopyAddr(CopyAddrInst *cai) {
   // Note that we intentionally don't support forwarding of weak pointers,
   // because the underlying value may drop be deallocated at any time.  We would
   // have to prove that something in this function is holding the weak value
   // live across the promoted region and that isn't desired for a stable
   // diagnostics pass this like one.

   // First attempt to find a source addr for our "load" instruction. If we fail
   // to find a valid value, just return.
   PILValue srcAddr = tryFindSrcAddrForLoad(cai);
   if (!srcAddr)
      return false;

   SmallVector<AvailableValue, 8> availableValues;
   auto result = computeAvailableValues(srcAddr, cai, availableValues);
   if (!result.hasValue())
      return false;

   // Ok, we have some available values.  If we have a copy_addr, explode it now,
   // exposing the load operation within it.  Subsequent optimization passes will
   // see the load and propagate the available values into it.
   DataflowContext.explodeCopyAddr(cai);

   // This is removing the copy_addr, but explodeCopyAddr takes care of
   // removing the instruction from Uses for us, so we return false.
   return false;
}

/// At this point, we know that this element satisfies the definitive init
/// requirements, so we can try to promote loads to enable SSA-based dataflow
/// analysis.  We know that accesses to this element only access this element,
/// cross element accesses have been scalarized.
///
/// This returns true if the load has been removed from the program.
bool AllocOptimize::promoteLoadBorrow(LoadBorrowInst *lbi) {
   // Note that we intentionally don't support forwarding of weak pointers,
   // because the underlying value may drop be deallocated at any time.  We would
   // have to prove that something in this function is holding the weak value
   // live across the promoted region and that isn't desired for a stable
   // diagnostics pass this like one.

   // First attempt to find a source addr for our "load" instruction. If we fail
   // to find a valid value, just return.
   PILValue srcAddr = tryFindSrcAddrForLoad(lbi);
   if (!srcAddr)
      return false;

   SmallVector<AvailableValue, 8> availableValues;
   auto result = computeAvailableValues(srcAddr, lbi, availableValues);
   if (!result.hasValue())
      return false;

   ++NumLoadPromoted;

   PILType loadTy = result->first;
   unsigned firstElt = result->second;

   // Aggregate together all of the subelements into something that has the same
   // type as the load did, and emit smaller loads for any subelements that were
   // not available. We are "propagating" a +1 available value from the store
   // points.
   AvailableValueAggregator agg(lbi, availableValues, Uses, deadEndBlocks,
                                AvailableValueExpectedOwnership::Borrow);
   PILValue newVal = agg.aggregateValues(loadTy, lbi->getOperand(), firstElt);

   LLVM_DEBUG(llvm::dbgs() << "  *** Promoting load: " << *lbi);
   LLVM_DEBUG(llvm::dbgs() << "      To value: " << *newVal);

   // If we inserted any copies, we created the copies at our
   // stores. We know that in our load block, we will reform the
   // aggregate as appropriate, will borrow the value there and give us
   // a whole pristine new value. Now in this routine, we go through
   // all of the copies and phis that we inserted and ensure that:
   //
   // 1. Phis are always strongly control equivalent to the copies that
   //    produced their incoming values.
   //
   // 2. All intermediate copies are properly lifetime extended to the
   //    load block and all leaking blocks are filled in as appropriate
   //    with destroy_values.
   agg.fixupOwnership(lbi, newVal);

   // Now that we have fixed up the lifetimes of all of our incoming copies so
   // that they are alive over the load point, copy, borrow newVal and insert
   // destroy_value after the end_borrow and then RAUW.
   PILBuilderWithScope builder(lbi);
   PILValue copiedVal = builder.emitCopyValueOperation(lbi->getLoc(), newVal);
   newVal = builder.createBeginBorrow(lbi->getLoc(), copiedVal);

   for (auto *ebi : lbi->getUsersOfType<EndBorrowInst>()) {
      auto next = std::next(ebi->getIterator());
      PILBuilderWithScope(next).emitDestroyValueOperation(ebi->getLoc(),
                                                          copiedVal);
   }

   lbi->replaceAllUsesWith(newVal);
   PILValue addr = lbi->getOperand();
   lbi->eraseFromParent();
   if (auto *addrI = addr->getDefiningInstruction())
      recursivelyDeleteTriviallyDeadInstructions(addrI);
   return true;
}

/// Return true if we can promote the given destroy.
bool AllocOptimize::canPromoteTake(
   PILInstruction *inst, SmallVectorImpl<AvailableValue> &availableValues) {
   PILValue address = inst->getOperand(0);

   // We cannot promote destroys of address-only types, because we can't expose
   // the load.
   PILType loadTy = address->getType().getObjectType();
   if (loadTy.isAddressOnly(*inst->getFunction()))
      return false;

   // If the box has escaped at this instruction, we can't safely promote the
   // load.
   if (DataflowContext.hasEscapedAt(inst))
      return false;

   // Compute the access path down to the field so we can determine precise
   // def/use behavior.
   unsigned firstElt = computeSubelement(address, TheMemory);
   assert(firstElt != ~0U && "destroy within enum projection is not valid");
   auto expansionContext = TypeExpansionContext(*inst->getFunction());
   unsigned numLoadSubElements =
      getNumSubElements(loadTy, Module, expansionContext);

   // Find out if we have any available values.  If no bits are demanded, we
   // trivially succeed. This can happen when there is a load of an empty struct.
   if (numLoadSubElements == 0)
      return true;

   // Set up the bitvector of elements being demanded by the load.
   SmallBitVector requiredElts(NumMemorySubElements);
   requiredElts.set(firstElt, firstElt + numLoadSubElements);

   // Compute our available values. If we do not have any available values,
   // return false. We have nothing further to do.
   SmallVector<AvailableValue, 8> tmpList;
   tmpList.resize(NumMemorySubElements);
   if (!DataflowContext.computeAvailableValues(
      inst, firstElt, numLoadSubElements, requiredElts, tmpList))
      return false;

   // Now check that we can perform a take upon our available values. This
   // implies today that our value is fully available. If the value is not fully
   // available, we would need to split stores to promote this destroy_addr. We
   // do not support that yet.
   AvailableValueAggregator agg(inst, tmpList, Uses, deadEndBlocks,
                                AvailableValueExpectedOwnership::Take);
   if (!agg.canTake(loadTy, firstElt))
      return false;

   // Ok, we can promote this destroy_addr... move the temporary lists contents
   // into the final AvailableValues list.
   std::move(tmpList.begin(), tmpList.end(),
             std::back_inserter(availableValues));

   return true;
}

// DestroyAddr is a composed operation merging load [take] + destroy_value.  If
// the implicit load's value is available, explode it.
//
// NOTE: We only do this if we have a fully available value.
//
// Note that we handle the general case of a destroy_addr of a piece of the
// memory object, not just destroy_addrs of the entire thing.
void AllocOptimize::promoteDestroyAddr(
   DestroyAddrInst *dai, MutableArrayRef<AvailableValue> availableValues) {
   PILValue address = dai->getOperand();
   PILType loadTy = address->getType().getObjectType();

   // Compute the access path down to the field so we can determine precise
   // def/use behavior.
   unsigned firstElt = computeSubelement(address, TheMemory);

   // Aggregate together all of the subelements into something that has the same
   // type as the load did, and emit smaller) loads for any subelements that were
   // not available.
   AvailableValueAggregator agg(dai, availableValues, Uses, deadEndBlocks,
                                AvailableValueExpectedOwnership::Take);
   PILValue newVal = agg.aggregateValues(loadTy, address, firstElt);

   ++NumDestroyAddrPromoted;

   LLVM_DEBUG(llvm::dbgs() << "  *** Promoting destroy_addr: " << *dai);
   LLVM_DEBUG(llvm::dbgs() << "      To value: " << *newVal);

   PILBuilderWithScope(dai).emitDestroyValueOperation(dai->getLoc(), newVal);
   dai->eraseFromParent();
}

void AllocOptimize::promoteLoadTake(
   LoadInst *li, MutableArrayRef<AvailableValue> availableValues) {
   assert(li->getOwnershipQualifier() == LoadOwnershipQualifier::Take &&
          "load [copy], load [trivial], load should be handled by "
          "promoteLoadCopy");
   PILValue address = li->getOperand();
   PILType loadTy = address->getType().getObjectType();

   // Compute the access path down to the field so we can determine precise
   // def/use behavior.
   unsigned firstElt = computeSubelement(address, TheMemory);

   // Aggregate together all of the subelements into something that has the same
   // type as the load did, and emit smaller) loads for any subelements that were
   // not available.
   AvailableValueAggregator agg(li, availableValues, Uses, deadEndBlocks,
                                AvailableValueExpectedOwnership::Take);
   PILValue newVal = agg.aggregateValues(loadTy, address, firstElt);

   ++NumLoadTakePromoted;

   LLVM_DEBUG(llvm::dbgs() << "  *** Promoting load_take: " << *li);
   LLVM_DEBUG(llvm::dbgs() << "      To value: " << *newVal);

   // Then perform the RAUW.
   li->replaceAllUsesWith(newVal);
   li->eraseFromParent();
}

namespace {

struct TakePromotionState {
   ArrayRef<PILInstruction *> takeInsts;
   SmallVector<unsigned, 8> takeInstIndices;
   SmallVector<AvailableValue, 32> availableValueList;
   SmallVector<unsigned, 8> availableValueStartOffsets;

   TakePromotionState(ArrayRef<PILInstruction *> takeInsts)
      : takeInsts(takeInsts) {}

   unsigned size() const { return takeInstIndices.size(); }

   void initializeForTakeInst(unsigned takeInstIndex) {
      availableValueStartOffsets.push_back(availableValueList.size());
      takeInstIndices.push_back(takeInstIndex);
   }

   std::pair<PILInstruction *, MutableArrayRef<AvailableValue>>
   getData(unsigned index) {
      unsigned takeInstIndex = takeInstIndices[index];
      unsigned startOffset = availableValueStartOffsets[index];
      unsigned count;

      if ((availableValueStartOffsets.size() - 1) != index) {
         count = availableValueStartOffsets[index + 1] - startOffset;
      } else {
         count = availableValueList.size() - startOffset;
      }

      MutableArrayRef<AvailableValue> values(&availableValueList[startOffset],
                                             count);
      return {takeInsts[takeInstIndex], values};
   }
};

} // end anonymous namespace

// Check if our use list has any non store, non take uses that keep the value
// alive. Returns nullptr on success and the user that prevents removal on
// failure.
//
// NOTE: This also gathers up any takes that we need to process.
static PILInstruction *
checkForNonStoreNonTakeUses(ArrayRef<PMOMemoryUse> uses,
                            SmallVectorImpl<PILInstruction *> &loadTakeList) {
   for (auto &u : uses) {
      // Ignore removed instructions.
      if (u.Inst == nullptr)
         continue;

      switch (u.Kind) {
         case PMOUseKind::Assign:
            // Until we can promote the value being destroyed by the assign, we can
            // not remove deallocations with such assigns.
            return u.Inst;
         case PMOUseKind::InitOrAssign:
            continue; // These don't prevent removal.
         case PMOUseKind::Load:
            // For now only handle takes from alloc_stack.
            //
            // TODO: It should be implementable, but it has not been needed yet.
            if (auto *li = dyn_cast<LoadInst>(u.Inst)) {
               if (li->getOwnershipQualifier() == LoadOwnershipQualifier::Take) {
                  loadTakeList.push_back(li);
                  continue;
               }
            }
            return u.Inst;
         case PMOUseKind::Initialization:
            if (!isa<ApplyInst>(u.Inst) &&
                // A copy_addr that is not a take affects the retain count
                // of the source.
                (!isa<CopyAddrInst>(u.Inst) ||
                 cast<CopyAddrInst>(u.Inst)->isTakeOfSrc()))
               continue;
            // FALL THROUGH.
            LLVM_FALLTHROUGH;
         case PMOUseKind::IndirectIn:
         case PMOUseKind::InOutUse:
         case PMOUseKind::Escape:
            return u.Inst; // These do prevent removal.
      }
   }

   return nullptr;
}

// We don't want to remove allocations that are required for useful debug
// information at -O0.  As such, we only remove allocations if:
//
// 1. They are in a transparent function.
// 2. They are in a normal function, but didn't come from a VarDecl, or came
//    from one that was autogenerated or inlined from a transparent function.
static bool isRemovableAutogeneratedAllocation(AllocationInst *TheMemory) {
   PILLocation loc = TheMemory->getLoc();
   return TheMemory->getFunction()->isTransparent() ||
          !loc.getAsAstNode<VarDecl>() || loc.isAutoGenerated() ||
          loc.is<MandatoryInlinedLocation>();
}

bool AllocOptimize::tryToRemoveDeadAllocation() {
   assert((isa<AllocBoxInst>(TheMemory) || isa<AllocStackInst>(TheMemory)) &&
          "Unhandled allocation case");

   if (!isRemovableAutogeneratedAllocation(TheMemory))
      return false;

   SmallVector<PILInstruction *, 8> loadTakeList;
   // Check the uses list to see if there are any non-store uses left over after
   // load promotion and other things PMO does.
   if (auto *badUser = checkForNonStoreNonTakeUses(Uses, loadTakeList)) {
      LLVM_DEBUG(llvm::dbgs() << "*** Failed to remove autogenerated alloc: "
                                 "kept alive by: "
                              << *badUser);
      return false;
   }

   // If our memory is trivially typed, we can just remove it without needing to
   // consider if the stored value needs to be destroyed. So at this point,
   // delete the memory!
   if (MemoryType.isTrivial(*TheMemory->getFunction())) {
      LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated trivial allocation: "
                              << *TheMemory);

      // If it is safe to remove, do it.  Recursively remove all instructions
      // hanging off the allocation instruction, then return success.  Let the
      // caller remove the allocation itself to avoid iterator invalidation.
      eraseUsesOfInstruction(TheMemory);

      return true;
   }

   // Now make sure we can promote all load [take] and prepare state for each of
   // them.
   TakePromotionState loadTakeState(loadTakeList);
   for (auto p : llvm::enumerate(loadTakeList)) {
      loadTakeState.initializeForTakeInst(p.index());
      if (!canPromoteTake(p.value(), loadTakeState.availableValueList))
         return false;
   }

   // Otherwise removing the deallocation will drop any releases.  Check that
   // there is nothing preventing removal.
   TakePromotionState destroyAddrState(Releases);
   for (auto p : llvm::enumerate(Releases)) {
      auto *r = p.value();
      if (r == nullptr)
         continue;

      // We stash all of the destroy_addr that we see.
      if (auto *dai = dyn_cast<DestroyAddrInst>(r)) {
         destroyAddrState.initializeForTakeInst(p.index() /*destroyAddrIndex*/);
         // Make sure we can actually promote this destroy addr. If we can not,
         // then we must bail. In order to not gather available values twice, we
         // gather the available values here that we will use to promote the
         // values.
         if (!canPromoteTake(dai, destroyAddrState.availableValueList))
            return false;
         continue;
      }

      LLVM_DEBUG(llvm::dbgs()
                    << "*** Failed to remove autogenerated non-trivial alloc: "
                       "kept alive by release: "
                    << *r);
      return false;
   }

   // If we reached this point, we can promote all of our destroy_addr and load
   // take. Since our load [take] may be available values for our destroy_addr,
   // we promote the destroy_addr first.
   for (unsigned i : range(destroyAddrState.size())) {
      PILInstruction *dai;
      MutableArrayRef<AvailableValue> values;
      std::tie(dai, values) = destroyAddrState.getData(i);
      promoteDestroyAddr(cast<DestroyAddrInst>(dai), values);
      // We do not need to unset releases, since we are going to exit here.
   }
   for (unsigned i : range(loadTakeState.size())) {
      PILInstruction *li;
      MutableArrayRef<AvailableValue> values;
      std::tie(li, values) = loadTakeState.getData(i);
      promoteLoadTake(cast<LoadInst>(li), values);
   }

   LLVM_DEBUG(llvm::dbgs() << "*** Removing autogenerated non-trivial alloc: "
                           << *TheMemory);

   // If it is safe to remove, do it.  Recursively remove all instructions
   // hanging off the allocation instruction, then return success.  Let the
   // caller remove the allocation itself to avoid iterator invalidation.
   eraseUsesOfInstruction(TheMemory);

   return true;
}

bool AllocOptimize::optimizeMemoryAccesses() {
   bool changed = false;

   // If we've successfully checked all of the definitive initialization
   // requirements, try to promote loads.  This can explode copy_addrs, so the
   // use list may change size.
   for (unsigned i = 0; i != Uses.size(); ++i) {
      auto &use = Uses[i];
      // Ignore entries for instructions that got expanded along the way.
      if (use.Inst && use.Kind == PMOUseKind::Load) {
         if (auto *cai = dyn_cast<CopyAddrInst>(use.Inst)) {
            if (promoteCopyAddr(cai)) {
               Uses[i].Inst = nullptr; // remove entry if load got deleted.
               changed = true;
            }
            continue;
         }

         if (auto *lbi = dyn_cast<LoadBorrowInst>(use.Inst)) {
            if (promoteLoadBorrow(lbi)) {
               Uses[i].Inst = nullptr; // remove entry if load got deleted.
               changed = true;
            }
            continue;
         }

         if (auto *li = dyn_cast<LoadInst>(use.Inst)) {
            if (promoteLoadCopy(li)) {
               Uses[i].Inst = nullptr; // remove entry if load got deleted.
               changed = true;
            }
            continue;
         }
      }
   }

   return changed;
}

//===----------------------------------------------------------------------===//
//                           Top Level Entrypoints
//===----------------------------------------------------------------------===//

static AllocationInst *getOptimizableAllocation(PILInstruction *i) {
   if (!isa<AllocBoxInst>(i) && !isa<AllocStackInst>(i)) {
      return nullptr;
   }

   auto *alloc = cast<AllocationInst>(i);

   // If our aggregate has unreferencable storage, we can't optimize. Return
   // nullptr.
   if (getMemoryType(alloc).aggregateHasUnreferenceableStorage())
      return nullptr;

   // Otherwise we are good to go. Lets try to optimize this memory!
   return alloc;
}

static bool optimizeMemoryAccesses(PILFunction &fn) {
   bool changed = false;
   DeadEndBlocks deadEndBlocks(&fn);

   for (auto &bb : fn) {
      auto i = bb.begin(), e = bb.end();
      while (i != e) {
         // First see if i is an allocation that we can optimize. If not, skip it.
         AllocationInst *alloc = getOptimizableAllocation(&*i);
         if (!alloc) {
            ++i;
            continue;
         }

         LLVM_DEBUG(llvm::dbgs()
                       << "*** PMO Optimize Memory Accesses looking at: " << *alloc);
         PMOMemoryObjectInfo memInfo(alloc);

         // Set up the datastructure used to collect the uses of the allocation.
         SmallVector<PMOMemoryUse, 16> uses;
         SmallVector<PILInstruction *, 4> destroys;

         // Walk the use list of the pointer, collecting them. If we are not able
         // to optimize, skip this value. *NOTE* We may still scalarize values
         // inside the value.
         if (!collectPMOElementUsesFrom(memInfo, uses, destroys)) {
            ++i;
            continue;
         }

         AllocOptimize allocOptimize(alloc, uses, destroys, deadEndBlocks);
         changed |= allocOptimize.optimizeMemoryAccesses();

         // Move onto the next instruction. We know this is safe since we do not
         // eliminate allocations here.
         ++i;
      }
   }

   return changed;
}

static bool eliminateDeadAllocations(PILFunction &fn) {
   bool changed = false;
   DeadEndBlocks deadEndBlocks(&fn);

   for (auto &bb : fn) {
      auto i = bb.begin(), e = bb.end();
      while (i != e) {
         // First see if i is an allocation that we can optimize. If not, skip it.
         AllocationInst *alloc = getOptimizableAllocation(&*i);
         if (!alloc) {
            ++i;
            continue;
         }

         LLVM_DEBUG(llvm::dbgs()
                       << "*** PMO Dead Allocation Elimination looking at: "
                       << *alloc);
         PMOMemoryObjectInfo memInfo(alloc);

         // Set up the datastructure used to collect the uses of the allocation.
         SmallVector<PMOMemoryUse, 16> uses;
         SmallVector<PILInstruction *, 4> destroys;

         // Walk the use list of the pointer, collecting them. If we are not able
         // to optimize, skip this value. *NOTE* We may still scalarize values
         // inside the value.
         if (!collectPMOElementUsesFrom(memInfo, uses, destroys)) {
            ++i;
            continue;
         }

         AllocOptimize allocOptimize(alloc, uses, destroys, deadEndBlocks);
         changed |= allocOptimize.tryToRemoveDeadAllocation();

         // Move onto the next instruction. We know this is safe since we do not
         // eliminate allocations here.
         ++i;
         if (alloc->use_empty()) {
            alloc->eraseFromParent();
            ++NumAllocRemoved;
            changed = true;
         }
      }
   }

   return changed;
}

namespace {

class PredictableMemoryAccessOptimizations : public PILFunctionTransform {
   /// The entry point to the transformation.
   ///
   /// FIXME: This pass should not need to rerun on deserialized
   /// functions. Nothing should have changed in the upstream pipeline after
   /// deserialization. However, rerunning does improve some benchmarks. This
   /// either indicates that this pass missing some opportunities the first time,
   /// or has a pass order dependency on other early passes.
   void run() override {
      // TODO: Can we invalidate here just instructions?
      if (optimizeMemoryAccesses(*getFunction()))
         invalidateAnalysis(PILAnalysis::InvalidationKind::FunctionBody);
   }
};

class PredictableDeadAllocationElimination : public PILFunctionTransform {
   void run() override {
      if (eliminateDeadAllocations(*getFunction()))
         invalidateAnalysis(PILAnalysis::InvalidationKind::FunctionBody);
   }
};

} // end anonymous namespace

PILTransform *polar::createPredictableMemoryAccessOptimizations() {
   return new PredictableMemoryAccessOptimizations();
}

PILTransform *polar::createPredictableDeadAllocationElimination() {
   return new PredictableDeadAllocationElimination();
}
